Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Kconfig =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/Kconfig 2004-11-11 10:28:08.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Kconfig 2004-12-17 12:45:23.000000000 -0500 @@ -6,6 +6,8 @@ source "drivers/mtd/Kconfig" +source "drivers/bluesmoke/Kconfig" + source "drivers/parport/Kconfig" source "drivers/pnp/Kconfig" Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/Makefile 2004-11-11 10:28:16.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile 2004-12-17 12:45:23.000000000 -0500 @@ -29,6 +29,7 @@ obj-$(CONFIG_IEEE1394) += ieee1394/ obj-y += cdrom/ video/ obj-$(CONFIG_MTD) += mtd/ +obj-$(CONFIG_BLUESMOKE) += bluesmoke/ obj-$(CONFIG_PCMCIA) += pcmcia/ obj-$(CONFIG_DIO) += dio/ obj-$(CONFIG_SBUS) += sbus/ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Kconfig =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/Kconfig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Kconfig 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,72 @@ +# +# Bluesmoke Kconfig +# Copyright (c) 2003 Linux Networx +# Licensed and distributed under the GPL +# +# $Id: Kconfig,v 1.4 2004/11/10 01:12:35 thayne Exp $ +# + +menu 'Bluesmoke - error detection and reporting (RAS)' + +config BLUESMOKE + tristate "Bluesmoke core system error reporting" + help + Bluesmoke is designed to report errors in the core system. + These are low-level errors that are reported in the CPU or + supporting chipset: memory errors, cache errors, PCI errors, + thermal throttling, etc.. If unsure, select 'Y'. + + +comment "Reporting subsystems" + depends on BLUESMOKE + +config BLUESMOKE_DEBUG + bool "Debugging" + depends on BLUESMOKE + help + This turns on debugging information for the entire Bluesmoke + sub-system. Usually you should select 'N'. + +config BULESMOKE_DEBUG_VERBOSE + int "Debugging verbosity (0=quiet, 3=noisy)" + depends on BLUESMOKE_DEBUG + default "0" + help + Verbosity level of Bluesmoke debug messages. + +config BLUESMOKE_MM_EDAC + tristate "Bluesmoke Main Memory EDAC (Error Detection And Correction) reporting" + depends on BLUESMOKE + help + Some systems are able to detect and correct errors in main + memory. Bluesmoke can report statistics on memory error + detection and correction (EDAC - or commonly referred to ECC + errors). Bluesmoke will also try to decode where these errors + occurred so that a particular failing memory module can be + replaced. If unsure, select 'Y'. + + +comment "Bluesmoke system controller/chipset support" + depends on BLUESMOKE + +config BLUESMOKE_AMD76X + tristate "AMD 76x (760, 762, 768)" + depends on BLUESMOKE + +config BLUESMOKE_E7XXX + tristate "Intel e7xxx (e7205, e7500, e7501, e7505)" + depends on BLUESMOKE + +config BLUESMOKE_E752X + tristate "Intel e752x (e7520)" + depends on BLUESMOKE + +config BLUESMOKE_I82875P + tristate "Intel 82875p" + depends on BLUESMOKE + +config BLUESMOKE_K8 + tristate "AMD K8 (Athlon FX, Athlon 64, Opteron)" + depends on BLUESMOKE + +endmenu Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Makefile =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Makefile 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,26 @@ +# +# Makefile for the Linux kernel bluesmoke drivers. +# +# Copyright 02 Jul 2003, Linux Networx (http://lnxi.com) +# This file may be distributed under the terms of the +# GNU General Public License. +# +# $Id: Makefile,v 1.4 2004/11/10 01:12:35 thayne Exp $ + + +obj-$(CONFIG_BLUESMOKE_MM_EDAC) += bluesmoke_mc.o +obj-$(CONFIG_BLUESMOKE_AMD76X) += bluesmoke_amd76x.o +obj-$(CONFIG_BLUESMOKE_E7XXX) += bluesmoke_e7xxx.o +obj-$(CONFIG_BLUESMOKE_E752X) += bluesmoke_e752x.o +obj-$(CONFIG_BLUESMOKE_I82875P) += bluesmoke_i82875p.o +obj-$(CONFIG_BLUESMOKE_K8) += bluesmoke_k8.o + +ifeq ($(PATCHLEVEL),4) + +export-objs := bluesmoke_mc.o + +O_TARGET := bluesmokelink.o + +include $(TOPDIR)/Rules.make + +endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_amd76x.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_amd76x.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_amd76x.c 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,323 @@ +/* + * AMD 76x Memory Controller kernel module + * (C) 2003 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Thayne Harbaugh + * Based on work by Dan Hollis and others. + * http://www.anime.net/~goemon/linux-ecc/ + * + * $Id: bluesmoke_amd76x.c,v 1.4 2004/11/10 01:12:35 thayne Exp $ + * + */ + + +#include +#include +#include + +#include +#include + +#include + +#include "bluesmoke_mc.h" + + +#define AMD76X_NR_CSROWS 8 +#define AMD76X_NR_CHANS 1 +#define AMD76X_NR_DIMMS 4 + + +/* AMD 76x register addresses - device 0 function 0 - PCI bridge */ +#define AMD76X_ECC_MODE_STATUS 0x48 /* Mode and status of ECC (32b) + * + * 31:16 reserved + * 15:14 SERR enabled: x1=ue 1x=ce + * 13 reserved + * 12 diag: disabled, enabled + * 11:10 mode: dis, EC, ECC, ECC+scrub + * 9:8 status: x1=ue 1x=ce + * 7:4 UE cs row + * 3:0 CE cs row + */ +#define AMD76X_DRAM_MODE_STATUS 0x58 /* DRAM Mode and status (32b) + * + * 31:26 clock disable 5 - 0 + * 25 SDRAM init + * 24 reserved + * 23 mode register service + * 22:21 suspend to RAM + * 20 burst refresh enable + * 19 refresh disable + * 18 reserved + * 17:16 cycles-per-refresh + * 15:8 reserved + * 7:0 x4 mode enable 7 - 0 + */ +#define AMD76X_MEM_BASE_ADDR 0xC0 /* Memory base address (8 x 32b) + * + * 31:23 chip-select base + * 22:16 reserved + * 15:7 chip-select mask + * 6:3 reserved + * 2:1 address mode + * 0 chip-select enable + */ + + +enum amd76x_chips { + AMD761 = 0, + AMD762 +}; + + +struct amd76x_dev_info { + const char *ctl_name; +}; + + +static const struct amd76x_dev_info amd76x_devs[] = { + [AMD761] = { + .ctl_name = "AMD761" + }, + [AMD762] = { + .ctl_name = "AMD762" + }, +}; + + +static void amd76x_check(struct mem_ctl_info *mci) +{ + u32 ems; + + debugf1( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); + + pci_read_config_dword(mci->pdev, AMD76X_ECC_MODE_STATUS, &ems); + + if ( ems & BIT(8) ) { /* UE? */ + u32 ems_ue_row = (ems >> 4) & 0xf; + + pci_write_bits32( mci->pdev, AMD76X_ECC_MODE_STATUS, + (u32)BIT(8), (u32)BIT(8) ); + + bluesmoke_mc_handle_ue( mci, mci->csrows[ems_ue_row].first_page, + 0, ems_ue_row, mci->ctl_name ); + } + + if ( ems & BIT(9) ) { /* CE? */ + u32 ems_ce_row = ems & 0xf; + + pci_write_bits32( mci->pdev, AMD76X_ECC_MODE_STATUS, + (u32)BIT(9), (u32)BIT(9) ); + + bluesmoke_mc_handle_ce( mci, mci->csrows[ems_ce_row].first_page, + 0, 0, ems_ce_row, 0, mci->ctl_name ); + } + return; +} + + +static int amd76x_probe1( struct pci_dev *pdev, int dev_idx ) +{ + int rc = -ENODEV; + int index; + struct mem_ctl_info *mci = NULL; + enum edac_type ems_modes[] = { EDAC_NONE, + EDAC_EC, + EDAC_SECDED, + EDAC_SECDED }; + u32 ems; + u32 ems_mode; + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems); + ems_mode = ( ems >> 10 ) & 0x3; + + mci = bluesmoke_mc_init_structs(0, + AMD76X_NR_CSROWS, + AMD76X_NR_CHANS); + + if ( ! mci ) { + rc = -ENOMEM; + goto FAIL_FINISHED; + } + + debugf0( "MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci ); + + mci->pdev = pdev; + mci->mtype_cap = MEM_FLAG_RDDR; + + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; + if ( ems_mode ) { + mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED; + } else { + mci->edac_cap = EDAC_FLAG_NONE; + } + + mci->mod_name = BS_MOD_STR; + mci->mod_ver = "$Revision: 1.4 $"; + mci->ctl_name = amd76x_devs[dev_idx].ctl_name; + mci->edac_check = amd76x_check; + mci->clear_err = NULL; + mci->ctl_page_to_phys = NULL; + + for ( index = 0; index < mci->nr_csrows; index++ ) { + struct csrow_info *csrow = &mci->csrows[ index ]; + u32 mba; + u32 mba_base; + u32 mba_mask; + u32 dms; + + /* find the DRAM Chip Select Base address and mask */ + pci_read_config_dword( mci->pdev, + AMD76X_MEM_BASE_ADDR + (index*4), + &mba ); + + if ( ! (mba & BIT(0)) ) { + continue; + } + + mba_base = mba & 0xff800000UL; + mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL; + + pci_read_config_dword( mci->pdev, + AMD76X_DRAM_MODE_STATUS, + &dms ); + + csrow->first_page = mba_base >> PAGE_SHIFT; + csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + csrow->nr_pages - 1; + csrow->page_mask = mba_mask >> PAGE_SHIFT; + csrow->grain = csrow->nr_pages << PAGE_SHIFT; + csrow->mtype = MEM_RDDR; + csrow->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN; + csrow->edac_mode = ems_modes[ ems_mode ]; + } + + /* clear counters */ + pci_write_bits32( mci->pdev, AMD76X_ECC_MODE_STATUS, + (u32)(0x3 << 8), (u32)(0x3 << 8) ); + + if ( 0 != bluesmoke_mc_add_mc( mci ) ) { + debugf3( "MC: " __FILE__ + ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); + goto FAIL_FINISHED; + } + + /* get this far and it's successful */ + debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); + rc = 0; + goto FINISHED; + + FAIL_FINISHED: + if ( mci ) { + kfree( mci ); + } + + FINISHED: + return( rc ); +} + + +#ifdef CONFIG_PM + +static int amd76x_suspend (struct pci_dev *pdev, u32 state) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + + +static int amd76x_resume (struct pci_dev *pdev) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + +#endif /* CONFIG_PM */ + + +/* returns count (>= 0), or negative on error */ +static int __devinit amd76x_init_one( struct pci_dev *pdev, + const struct pci_device_id *ent ) +{ + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + /* don't need to call pci_device_enable() */ + return amd76x_probe1( pdev, ent->driver_data ); +} + + +static void __devexit amd76x_remove_one( struct pci_dev *pdev ) +{ + struct mem_ctl_info *mci; + + debugf0( __FILE__ ": %s()\n", __func__); + + if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { + goto FINISHED; + } + + if ( 0 != bluesmoke_mc_del_mc( mci ) ) { + goto FINISHED; + } + + kfree( mci ); + + FINISHED: + return; +} + + +static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = { + { PCI_VEND_DEV( AMD, FE_GATE_700C ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD762 }, + { PCI_VEND_DEV( AMD, FE_GATE_700E ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD761 }, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl); + + +static struct pci_driver amd76x_driver = { + .name = BS_MOD_STR, + .probe = amd76x_init_one, + .remove = __devexit_p(amd76x_remove_one), + .id_table = amd76x_pci_tbl, +#ifdef CONFIG_PM + .suspend = amd76x_suspend, + .resume = amd76x_resume, +#endif /* CONFIG_PM */ +}; + + +int __init amd76x_init(void) +{ + int pci_rc; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_rc = pci_module_init( &amd76x_driver ); + if ( pci_rc < 0 ) return pci_rc; + + return 0; +} + + +static void __exit amd76x_exit(void) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_unregister_driver( &amd76x_driver ); +} + + +module_init(amd76x_init); +module_exit(amd76x_exit); + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); +MODULE_DESCRIPTION("MC support for AMD 76x memory controllers"); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e752x.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_e752x.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e752x.c 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,1027 @@ +/* + * Intel e752x Memory Controller kernel module + * (C) 2004 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * See "enum e752x_chips" below for supported chipsets + * + * Written by Tom Zimmerman + * + * Contributors: + * Thayne Harbaugh (Linux Networx) + * + * $Id: bluesmoke_e752x.c,v 1.5 2004/11/18 22:19:46 thayne Exp $ + * + */ + + +#include +#include +#include + +#include +#include + +#include + +#include "bluesmoke_mc.h" + + +#ifndef PCI_DEVICE_ID_INTEL_7520_0 +#define PCI_DEVICE_ID_INTEL_7520_0 0x3590 +#endif /* PCI_DEVICE_ID_INTEL_7520_0 */ + +#ifndef PCI_DEVICE_ID_INTEL_7520_1_ERR +#define PCI_DEVICE_ID_INTEL_7520_1_ERR 0x3591 +#endif /* PCI_DEVICE_ID_INTEL_7520_1_ERR */ + + +#define E752X_NR_CSROWS 8 /* number of csrows */ + + +/* E752X register addresses - device 0 function 0 */ +#define E752X_DRB 0x60 /* DRAM row boundary register (8b) */ +#define E752X_DRA 0x70 /* DRAM row attribute register (8b) */ + /* + * 31:30 Device width row 7 + * 01=x8 10=x4 11=x8 DDR2 + * 27:26 Device width row 6 + * 23:22 Device width row 5 + * 19:20 Device width row 4 + * 15:14 Device width row 3 + * 11:10 Device width row 2 + * 7:6 Device width row 1 + * 3:2 Device width row 0 + */ +#define E752X_DRC 0x7C /* DRAM controller mode reg (32b) */ + /* + * 22 Number channels 0=1,1=2 + * 19:18 DRB Granularity 32/64MB + */ +#define E752X_DRM 0x80 /* Dimm mapping register */ +#define E752X_DDRCSR 0x9A /* DDR control and status reg (16b) */ + /* + * 14:12 1 single A, 2 single B, 3 dual + */ +#define E752X_TOLM 0xC4 /* DRAM top of low memory reg (16b) */ +#define E752X_REMAPBASE 0xC6 /* DRAM remap base address reg (16b) */ +#define E752X_REMAPLIMIT 0xC8 /* DRAM remap limit address reg (16b) */ +#define E752X_REMAPOFFSET 0xCA /* DRAM remap limit offset reg (16b) */ + +/* E752X register addresses - device 0 function 1 */ +#define E752X_FERR_GLOBAL 0x40 /* Global first error register (32b)*/ +#define E752X_NERR_GLOBAL 0x44 /* Global next error register (32b) */ +#define E752X_HI_FERR 0x50 /* Hub interface first error reg (8b)*/ +#define E752X_HI_NERR 0x52 /* Hub interface next error reg (8b)*/ +#define E752X_HI_ERRMASK 0x54 /* Hub interface error mask reg (8b)*/ +#define E752X_HI_SMICMD 0x5A /* Hub interface SMI command reg (8b)*/ +#define E752X_SYSBUS_FERR 0x60 /* System buss first error reg (16b)*/ +#define E752X_SYSBUS_NERR 0x62 /* System buss next error reg (16b)*/ +#define E752X_SYSBUS_ERRMASK 0x64 /* System buss error mask reg (16b) */ +#define E752X_SYSBUS_SMICMD 0x6A /* System buss SMI command reg (16b) */ +#define E752X_BUF_FERR 0x70 /* Memory buffer first error reg (8b)*/ +#define E752X_BUF_NERR 0x72 /* Memory buffer next error reg (8b)*/ +#define E752X_BUF_ERRMASK 0x74 /* Memory buffer error mask reg (8b)*/ +#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI command reg (8b)*/ +#define E752X_DRAM_FERR 0x80 /* DRAM first error register (16b) */ +#define E752X_DRAM_NERR 0x82 /* DRAM next error register (16b) */ +#define E752X_DRAM_ERRMASK 0x84 /* DRAM error mask register (8b) */ +#define E752X_DRAM_SMICMD 0x8A /* DRAM SMI command register (8b) */ +#define E752X_DRAM_RETRY_ADD 0xAC /* DRAM Retry address register (32b) */ +#define E752X_DRAM_CELOG1_ADD 0xA0 /* DRAM first correctable memory */ + /* error address register (32b) */ + /* + * 31 Reserved + * 30:2 CE address (64 byte block 34:6) + * 1 Reserved + * 0 HiLoCS + */ +#define E752X_DRAM_CELOG2_ADD 0xC8 /* DRAM first correctable memory */ + /* error address register (32b) */ + /* + * 31 Reserved + * 30:2 CE address (64 byte block 34:6) + * 1 Reserved + * 0 HiLoCS + */ +#define E752X_DRAM_UELOG_ADD 0xA4 /* DRAM first uncorrectable memory */ + /* error address register (32b) */ + /* + * 31 Reserved + * 30:2 CE address (64 byte block 34:6) + * 1 Reserved + * 0 HiLoCS + */ +#define E752X_DRAM_UELOGS_ADD 0xA8 /* DRAM first uncorrectable scrub memory */ + /* error address register (32b) */ + /* + * 31 Reserved + * 30:2 CE address (64 byte block 34:6) + * 1 Reserved + * 0 HiLoCS + */ +#define E752X_DRAM_CELOG1_SYNDROME 0xC4 /* DRAM first correctable memory */ + /* error syndrome register (16b) */ +#define E752X_DRAM_CELOG2_SYNDROME 0xC6 /* DRAM second correctable memory */ + /* error syndrome register (16b) */ +#define E752X_DEVPRES1 0xF4 /* Device Present 1 register (8b) */ + +/* ICH5R register addresses - device 30 function 0 */ +#define ICH5R_PCI_STAT 0x06 /* PCI status register (16b) */ +#define ICH5R_PCI_2ND_STAT 0x1E /* PCI status secondary reg (16b) */ +#define ICH5R_PCI_BRIDGE_CTL 0x3E /* PCI bridge control register (16b) */ + +enum e752x_chips { + E7520 = 0, +}; + + +struct e752x_pvt { + struct pci_dev *bridge_ck; + struct pci_dev *dev_d0f0; + struct pci_dev *dev_d0f1; + u32 tolm; + u32 remapbase; + u32 remaplimit; + int mc_symmetric; + u8 map[8]; + int map_type; + const struct e752x_dev_info *dev_info; +}; + + +struct e752x_dev_info { + u16 err_dev; + const char *ctl_name; +}; + + +static const struct e752x_dev_info e752x_devs[] = { + [E7520] = { + .err_dev = PCI_DEVICE_ID_INTEL_7520_1_ERR, + .ctl_name = "E7520" + }, +}; + + +/* FIXME - is this valid for both SECDED and S4ECD4ED? */ +static inline int e752x_find_channel(u16 syndrome) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + if((syndrome & 0xff00)==0) + return(0); + if((syndrome & 0x00ff)==0) + return(1); + if((syndrome & 0xf000)==0) + return(0); + if((syndrome & 0x0f00)==0) + return(0); + return(1); +} + + +static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, + unsigned long page) +{ + u32 remap; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + if(page < pvt->tolm) + return(page); + if((page >= 0x100000)&&(page < pvt->remapbase)) + return(page); + remap = (page - pvt->tolm) + pvt->remapbase; + if(remap < pvt->remaplimit) + return(remap); + printk(KERN_ERR "Invalid page %lx - out of range\n", page); + return(pvt->tolm-1); +} + + +static void process_ce(struct mem_ctl_info *mci, u16 error_one, + u32 celog1_add, u16 celog1_syndrome) +{ + u32 error_1b, page; + u16 syndrome; + int row; + int channel; + int i; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + if(error_one&0x0101) { + /* read the error address */ +// pci_read_config_dword(pvt->bridge_ck,E752X_DRAM_CELOG1_ADD, +// &error_1b); + error_1b = celog1_add; + page = error_1b >> (PAGE_SHIFT-4); /* convert the addr to 4k page */ + /* read the syndrome */ +// pci_read_config_word(pvt->bridge_ck,E752X_DRAM_CELOG1_SYNDROME, +// &syndrome); + syndrome = celog1_syndrome; + /* FIXME - check for -1 */ + if (pvt->mc_symmetric) { + row = ((page >>1)&3); /* chip select are bits 14 & 13 */ + printk( KERN_WARNING + "Test row %d Table %d %d %d %d %d %d %d %d\n", + row,pvt->map[0],pvt->map[1],pvt->map[2],pvt->map[3],pvt->map[4], + pvt->map[5],pvt->map[6],pvt->map[7]); + + /* test for channel remapping */ + for(i=0;i<8;i++) { + if(pvt->map[i] == row) + break; + } + printk( KERN_WARNING + "Test computed row %d\n",i); + if(i<8) { + row = i; + } + else { + printk( KERN_WARNING + "MC%d: row %d not found in remap table\n", + mci->mc_idx,row); + } + + + } else { + row = bluesmoke_mc_find_csrow_by_page( mci, page ); + } + if(error_one&1) + channel = 0; /* 0 = channel A */ + else + channel = 1; /* 1 = channel B */ + + if(!pvt->map_type) + row = 7 - row; + bluesmoke_mc_handle_ce( mci, page, 0, syndrome, + row, channel, "e752x CE" ); + } +} + + +static void process_ue(struct mem_ctl_info *mci, u16 error_one, + u32 uelog_add, u32 uelogs_add) +{ + u32 error_2b, block_page; + int row; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + if(error_one & 0x0202) { + error_2b = uelog_add; + /* convert to 4k address */ + block_page = error_2b >> (PAGE_SHIFT - 4); + if (pvt->mc_symmetric) { + /* chip select are bits 14 & 13 */ + row = ((block_page >>1)&3); + } + else { + row = bluesmoke_mc_find_csrow_by_page(mci, block_page); + } + bluesmoke_mc_handle_ue( mci, block_page, 0, row, + "e752x UE from Read" ); + } + if(error_one & 0x0404) { + error_2b = uelogs_add; + /* convert to 4k address */ + block_page = error_2b >> (PAGE_SHIFT - 4); + if (pvt->mc_symmetric) { + /* chip select are bits 14 & 13 */ + row = ((block_page >>1)&3); + } + else { + row = bluesmoke_mc_find_csrow_by_page(mci, block_page); + } + bluesmoke_mc_handle_ue( mci, block_page, 0, row, + "e752x UE from Scruber" ); + } +} + +#if 0 +static void process_ue_no_info(struct mem_ctl_info *mci) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + bluesmoke_mc_handle_ue_no_info( mci, "e752x UE log register overflow" ); +} +#endif + +static void process_ue_no_info_wr(struct mem_ctl_info *mci) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + bluesmoke_mc_handle_ue_no_info( mci, "e752x UE log memory write" ); +} + +static void process_ded_retry(struct mem_ctl_info *mci,u16 error,u32 retry_add) +{ + u32 error_1b, page; + int row; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; + + error_1b = retry_add; + page = error_1b >> (PAGE_SHIFT-4); /* convert the addr to 4k page */ + if (pvt->mc_symmetric) { + row = ((page >>1)&3); /* chip select are bits 14 & 13 */ + } else { + row = bluesmoke_mc_find_csrow_by_page( mci, page ); + } + printk( KERN_WARNING + "MC%d: CE page 0x%lx, row %d : Memory read retry\n", + mci->mc_idx,(long unsigned int)page,row); +} + +static void process_threshold_ce(struct mem_ctl_info *mci,u16 error) +{ + printk( KERN_WARNING + "MC%d: Memory threshold CE\n",mci->mc_idx); +} + +char *global_message[11]= {"PCI Express C1","PCI Express C","PCI Express B1", + "PCI Express B","PCI Express A1","PCI Express A", + "DMA Controler","HUB Interface","System Bus", + "DRAM Controler","Internal Buffer"}; +char *fatal_message[2]={"Non-Fatal ","Fatal "}; + +static void global_error(int fatal, u32 errors) +{ + int i; + + for(i=0;i<11;i++) { + if(errors & (1<pvt_info; + struct pci_dev *pres_dev; + struct pci_dev *dev; + + /* clear snapshot */ + hi_ferr=hi_nerr=buf_ferr=buf_nerr=0; + sysbus_ferr=sysbus_nerr=dram_ferr=dram_nerr=0; + celog1_syndrome=celog2_syndrome=retry_add=0; + celog1_add=celog2_add=uelog_add=uelogs_add=0; + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + if (pvt->dev_d0f1 != NULL) { + dev = pvt->dev_d0f1; + pci_read_config_dword(dev,E752X_FERR_GLOBAL,&stat32); + if(stat32) { /* Error, so process */ +#if 1 + /* dump d0f0 and d0f1 */ + printk("\nDevice 0 Function 0"); + for(i=0;i<0x100;i++) { + pci_read_config_byte(mci->pdev,i,&stat8); + if((i%16)==0) { + printk("\n%2.2x ",i); + } + printk("%2.2x ",stat8); + } + printk("\n"); + /* dump d0f0 and d0f1 */ + printk("\nDevice 0 Function 1"); + for(i=0;i<0x100;i++) { + pci_read_config_byte(dev,i,&stat8); + if((i%16)==0) { + printk("\n%2.2x ",i); + } + printk("%2.2x ",stat8); + } + printk("\n\n"); +#endif + /* take a snap shot of first errors */ + pci_read_config_byte(dev,E752X_HI_FERR,&hi_ferr); + pci_read_config_word(dev,E752X_SYSBUS_FERR,&sysbus_ferr); + pci_read_config_byte(dev,E752X_BUF_FERR,&buf_ferr); + pci_read_config_word(dev,E752X_DRAM_FERR,&dram_ferr); + pci_read_config_dword(dev,E752X_DRAM_CELOG1_ADD, + &celog1_add); + pci_read_config_word(dev,E752X_DRAM_CELOG1_SYNDROME, + &celog1_syndrome); + pci_read_config_dword(dev,E752X_DRAM_UELOG_ADD, + &uelog_add); + pci_read_config_dword(dev,E752X_DRAM_UELOGS_ADD, + &uelogs_add); + pci_read_config_dword(dev,E752X_DRAM_RETRY_ADD, + &retry_add); + + pci_write_config_dword(dev,E752X_FERR_GLOBAL,stat32); + error32=(stat32>>18)&0x3ff; + stat32=(stat32>>4)&0x7ff; + if(error32) + global_error(1,error32); + if(stat32) + global_error(0,stat32); + } + + pci_read_config_dword(dev,E752X_NERR_GLOBAL,&stat32); + if(stat32) { /* Error, so process */ + /* take a snap shot of second errors */ + pci_read_config_byte(dev,E752X_HI_NERR,&hi_nerr); + pci_read_config_word(dev,E752X_SYSBUS_NERR,&sysbus_nerr); + pci_read_config_byte(dev,E752X_BUF_NERR,&buf_nerr); + pci_read_config_word(dev,E752X_DRAM_NERR,&dram_nerr); + pci_read_config_dword(dev,E752X_DRAM_CELOG2_ADD, + &celog2_add); + pci_read_config_word(dev,E752X_DRAM_CELOG2_SYNDROME, + &celog2_syndrome); + + pci_write_config_dword(dev,E752X_NERR_GLOBAL,stat32); + error32=(stat32>>18)&0x3ff; + stat32=(stat32>>4)&0x7ff; + if(error32) + global_error(1,error32); + if(stat32) + global_error(0,stat32); + } + +// pci_read_config_byte(dev,E752X_HI_FERR,&stat8); + stat8=hi_ferr; + if(stat8&0x7f) { /* Error, so process */ + pci_write_config_dword(dev,E752X_HI_FERR,stat8); + stat8 &= 0x7f; + if(stat8&0x2b) + hub_error(1,(stat8&0x2b)); + if(stat8 & 0x54) + hub_error(0,(stat8&0x54)); + } +// pci_read_config_byte(dev,E752X_HI_NERR,&stat8); + stat8=hi_nerr; + if(stat8&0x7f) { /* Error, so process */ + pci_write_config_dword(dev,E752X_HI_NERR,stat8); + stat8 &= 0x7f; + if(stat8&0x2b) + hub_error(1,(stat8&0x2b)); + if(stat8 & 0x54) + hub_error(0,(stat8&0x54)); + } +// pci_read_config_dword(dev,E752X_SYSBUS_FERR,&stat32); + stat32 = sysbus_ferr + (sysbus_nerr <<16); + if(stat32) { /* Error, so process */ + pci_write_config_dword(dev,E752X_SYSBUS_FERR,stat32); + error32=(stat32>>16)&0x3ff; + stat32=stat32&0x3ff; + if(stat32 & 0x083) + sysbus_error(1,(stat32&0x083)); + if(stat32 & 0x37c) + sysbus_error(0,(stat32&0x37c)); + if(error32 & 0x083) + sysbus_error(1,(error32&0x083)); + if(error32 & 0x37c) + sysbus_error(0,(error32&0x37c)); + } +// pci_read_config_byte(dev,E752X_BUF_FERR,&stat8); + stat8 = buf_ferr; + if(stat8&0x0f) { /* Error, so process */ + pci_write_config_dword(dev,E752X_BUF_FERR,stat8); + stat8 &= 0x0f; + membuf_error(stat8); + } +// pci_read_config_byte(dev,E752X_BUF_NERR,&stat8); + stat8 = buf_nerr; + if(stat8&0x0f) { /* Error, so process */ + pci_write_config_dword(dev,E752X_BUF_NERR,stat8); + stat8 &= 0x0f; + membuf_error(stat8); + } + + + + +// pci_read_config_word(pvt->bridge_ck,E752X_DRAM_FERR,&error_one); +// pci_read_config_word(pvt->bridge_ck,E752X_DRAM_NERR,&error_next); + error_one = dram_ferr; + error_next = dram_nerr; + /* clear any error bits */ + if(error_one) { + pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR, + error_one,error_one); + } + if(error_next) { + pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR, + error_next, error_next); + } + + /* decode and report errors */ + if(error_one & 0x0101) { /* check first error correctable */ + process_ce(mci,error_one,celog1_add,celog1_syndrome); + } + if(error_next & 0x0101) { /* check next error correctable */ + process_ce(mci,error_next,celog2_add,celog2_syndrome); + } + if(error_one & 0x4040) { + process_ue_no_info_wr(mci); + } + if(error_next & 0x4040) { + process_ue_no_info_wr(mci); + } + if(error_one & 0x2020) { + process_ded_retry(mci,error_one,retry_add); + } + if(error_next & 0x2020) { + process_ded_retry(mci,error_next,retry_add); + } + if(error_one & 0x0808) { + process_threshold_ce(mci,error_one); + } + if(error_next & 0x0808) { + process_threshold_ce(mci,error_next); + } + if(error_one & 0x0606) { + process_ue(mci,error_one,uelog_add,uelogs_add); + } + if(error_next & 0x0606) { + process_ue(mci,error_next,uelog_add,uelogs_add); + } + + + } + /* Test for PCI Parity errors in the southbridge */ + if (pvt->dev_d0f0 != NULL) { + dev = pvt->dev_d0f0; + for(pres_dev = dev; + ((struct pci_dev*)pres_dev->global_list.next != dev); + pres_dev = (struct pci_dev*)pres_dev->global_list.next) { + pci_read_config_dword(pres_dev,PCI_COMMAND,&stat32); + stat = (u16)(stat32 >>16); + /* test for error any error bits */ + if(stat32 & ((1<<6)+(1<<8))) { /* error reporting dev */ + if(stat & ((1<<15)+(1<<14)+(1<<8))) { + pci_write_config_word(pres_dev,6,stat); + if(stat & (1<<14)) { + printk( KERN_WARNING + "System Error on %s %s\n", + pres_dev->slot_name, + pci_pretty_name(pres_dev)); + } + if(stat & ((1<<15)+(1<<8))) { + printk( KERN_WARNING + "Parity Error on %s %s\n", + pres_dev->slot_name, + pci_pretty_name(pres_dev)); + } + } + } + } + } +} + + +static int e752x_probe1( struct pci_dev *pdev, int dev_idx ) +{ + int rc = -ENODEV; + int index; + u16 pci_data, stat; + u32 stat32; + u16 stat16; + u8 stat8; + struct mem_ctl_info *mci = NULL; + struct e752x_pvt *pvt = NULL; + u16 ddrcsr; + u32 drc; + int drc_chan; /* Number of channels 0=1chan,1=2chan */ + int drc_drbg; /* DRB granularity 0=32mb,1=64mb */ + int drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ + u32 dra; + unsigned long last_cumul_size; + struct pci_dev *pres_dev; + struct pci_dev *dev; + + debugf0( "MC: " __FILE__ ": %s(): mci\n", __func__ ); + printk( KERN_ERR "Starting Probe1\n" ); + + /* enable device 0 function 1 */ + pci_read_config_byte(pdev, E752X_DEVPRES1, &stat8); + stat8 |= (1<<5); + pci_write_config_byte(pdev, E752X_DEVPRES1, stat8); + + /* need to find out the number of channels */ + pci_read_config_dword(pdev, E752X_DRC, &drc); + pci_read_config_word(pdev, E752X_DDRCSR, &ddrcsr); + if(((ddrcsr>>12)&3)==3) + drc_chan = 1; /* Dual channel */ + else + drc_chan = 0; /* Single channel */ + drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */ + drc_ddim = ( drc >> 20 ) & 0x3; + + mci = bluesmoke_mc_init_structs(sizeof(*pvt), + E752X_NR_CSROWS, + drc_chan + 1); + + if ( ! mci ) { + rc = -ENOMEM; + goto FAIL_FINISHED; + } + + debugf3( "MC: " __FILE__ ": %s(): init mci\n", __func__ ); + + mci->mtype_cap = MEM_FLAG_RDDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | EDAC_FLAG_S4ECD4ED; + /* FIXME - what if different memory types are in different csrows? */ + mci->mod_name = BS_MOD_STR; + mci->mod_ver = "$Revision: 1.5 $"; + mci->pdev = pdev; + + debugf3( "MC: " __FILE__ ": %s(): init pvt\n", __func__ ); + pvt = (struct e752x_pvt *)mci->pvt_info; + pvt->dev_info = &e752x_devs[dev_idx]; + pvt->bridge_ck = pci_find_device( PCI_VENDOR_ID_INTEL, + pvt->dev_info->err_dev, + pvt->bridge_ck ); + if ( ! pvt->bridge_ck ) { + pvt->bridge_ck = pci_scan_single_device(pdev->bus, PCI_DEVFN(0,1)); + } + if ( ! pvt->bridge_ck ) { + printk( KERN_ERR + "MC: error reporting device not found:" + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev ); + goto FAIL_FINISHED; + } + if(ddrcsr & 0x10) { + pvt->mc_symmetric = 1; + } else { + pvt->mc_symmetric =0; + } + + debugf3( "MC: " __FILE__ ": %s(): more mci init\n", __func__ ); + mci->ctl_name = pvt->dev_info->ctl_name; + + mci->edac_check = e752x_check; + /* FIXME - why isn't clear_err set to something? */ + mci->clear_err = NULL; + mci->ctl_page_to_phys = ctl_page_to_phys; + + /* find out the device types */ + pci_read_config_dword(pdev, E752X_DRA, &dra); + + /* + * The dram row boundary (DRB) reg values are boundary address + * for each DRAM row with a granularity of 64 or 128MB (single/dual + * channel operation). DRB regs are cumulative; therefore DRB7 will + * contain the total memory contained in all eight rows. + */ + for( last_cumul_size = index = 0; index < mci->nr_csrows; index++ ) { + u8 value; + u32 cumul_size; + /* mem_dev 0=x8, 1=x4 */ + int mem_dev = ( dra >> ( index * 4 + 2 ) ) & 0x3; + struct csrow_info *csrow = &mci->csrows[ index ]; + + if(mem_dev == 2) + mem_dev = 1; + else + mem_dev = 0; + pci_read_config_byte(mci->pdev, E752X_DRB + index, &value); + /* convert a 128 or 64 MiB DRB to a page size. */ + cumul_size = value << (25 + drc_drbg - PAGE_SHIFT ); + debugf3( "MC: " __FILE__ ": %s(): (%d) cumul_size 0x%x\n", + __func__, index, cumul_size ); + if ( cumul_size == last_cumul_size ) { + continue; /* not populated */ + } + + csrow->first_page = last_cumul_size; + csrow->last_page = cumul_size - 1; + csrow->nr_pages = cumul_size - last_cumul_size; + last_cumul_size = cumul_size; + csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */ + csrow->mtype = MEM_RDDR; /* only one type supported */ + csrow->dtype = mem_dev ? DEV_X4 : DEV_X8; + + /* + * if single channel or x8 devices then SECDED + * if dual channel and x4 then S4ECD4ED + */ + if ( drc_ddim ) { + if ( drc_chan && mem_dev ) { + csrow->edac_mode = EDAC_S4ECD4ED; + mci->edac_cap |= EDAC_FLAG_S4ECD4ED; + } else { + csrow->edac_mode = EDAC_SECDED; + mci->edac_cap |= EDAC_FLAG_SECDED; + } + } else { + csrow->edac_mode = EDAC_NONE; + } + } + + /* Fill in the memory map table */ + { + u8 value; + u8 last=0; + u8 row=0; + for(index=0;index<8;index+=2) { + + pci_read_config_byte(mci->pdev, E752X_DRB + index, &value); + /* test if there is a dimm in this slot */ + if(value == last) { + /* no dimm in the slot, so flag it as empty */ + pvt->map[index]=0xff; + pvt->map[index+1]=0xff; + } + else { /* there is a dimm in the slot */ + pvt->map[index]=row; + row++; + last = value; + /* test the next value to see if the dimm is double sided */ + pci_read_config_byte(mci->pdev, E752X_DRB + index + 1, &value); + if(value == last) { + /* the dimm is single sided, so flag as empty */ + pvt->map[index+1]=0xff; + row++; + } + else { + /* this is a double sided dimm to save the next row # */ + pvt->map[index+1]=row; + row++; + } + last = value; + } + } + } + + /* set the map type. 1 = normal, 0 = reversed */ + pci_read_config_byte(mci->pdev, E752X_DRM, &stat8); + if((stat8&0x0f) > ((stat8>>4)&0x0f)) { + /* map type is normal */ + pvt->map_type = 1; + } + else { + /* map type is reversed */ + pvt->map_type = 0; + } + + mci->edac_cap |= EDAC_FLAG_NONE; + + debugf3( "MC: " __FILE__ ": %s(): tolm, remapbase, remaplimit\n", __func__ ); + /* load the top of low memory, remap base, and remap limit vars */ + pci_read_config_word(mci->pdev, E752X_TOLM, &pci_data); + pvt->tolm = ((u32)pci_data) << 4; + pci_read_config_word(mci->pdev, E752X_REMAPBASE, &pci_data); + pvt->remapbase = ((u32)pci_data) << 14; + pci_read_config_word(mci->pdev, E752X_REMAPLIMIT, &pci_data); + pvt->remaplimit = ((u32)pci_data) << 14; + printk( "tolm = %x, remapbase = %x, remaplimit = %x\n", + pvt->tolm, pvt->remapbase, pvt->remaplimit); + + if ( 0 != bluesmoke_mc_add_mc( mci ) ) { + debugf3( "MC: " __FILE__ ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); + goto FAIL_FINISHED; + } + + /* Walk through the PCI table and clear errors */ + dev = pci_find_device( PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_7520_0, NULL ); + pvt->dev_d0f0 = dev; + for(pres_dev = dev; + ((struct pci_dev*)pres_dev->global_list.next != dev); + pres_dev = (struct pci_dev*)pres_dev->global_list.next) { + pci_read_config_dword(pres_dev,PCI_COMMAND,&stat32); + stat = (u16)(stat32 >>16); + /* clear any error bits */ + if(stat32 & ((1<<6)+(1<<8))) { + pci_write_config_word(pres_dev,PCI_STATUS,stat); + } + } + /* find the error reporting device and clear errors */ + dev = pvt->dev_d0f1 = pvt->bridge_ck; + /* Turn off error disable & SMI in case the BIOS turned it on */ + pci_write_config_byte(dev,E752X_HI_ERRMASK,0x00); + pci_write_config_byte(dev,E752X_HI_SMICMD,0x00); + pci_write_config_word(dev,E752X_SYSBUS_ERRMASK,0x00); + pci_write_config_word(dev,E752X_SYSBUS_SMICMD,0x00); + pci_write_config_byte(dev,E752X_BUF_ERRMASK,0x00); + pci_write_config_byte(dev,E752X_BUF_SMICMD,0x00); + pci_write_config_byte(dev,E752X_DRAM_ERRMASK,0x00); + pci_write_config_byte(dev,E752X_DRAM_SMICMD,0x00); + /* clear other MCH errors */ + pci_read_config_dword(dev,E752X_FERR_GLOBAL,&stat32); + pci_write_config_dword(dev,E752X_FERR_GLOBAL,stat32); + pci_read_config_dword(dev,E752X_NERR_GLOBAL,&stat32); + pci_write_config_dword(dev,E752X_NERR_GLOBAL,stat32); + pci_read_config_byte(dev,E752X_HI_FERR,&stat8); + pci_write_config_byte(dev,E752X_HI_FERR,stat8); + pci_read_config_byte(dev,E752X_HI_NERR,&stat8); + pci_write_config_byte(dev,E752X_HI_NERR,stat8); + pci_read_config_dword(dev,E752X_SYSBUS_FERR,&stat32); + pci_write_config_dword(dev,E752X_SYSBUS_FERR,stat32); + pci_read_config_byte(dev,E752X_BUF_FERR,&stat8); + pci_write_config_byte(dev,E752X_BUF_FERR,stat8); + pci_read_config_byte(dev,E752X_BUF_NERR,&stat8); + pci_write_config_byte(dev,E752X_BUF_NERR,stat8); + pci_read_config_word(dev, E752X_DRAM_FERR, &stat16); + pci_write_config_word(dev, E752X_DRAM_FERR, stat16); + pci_read_config_word(dev, E752X_DRAM_NERR, &stat16); + pci_write_config_word(dev, E752X_DRAM_NERR, stat16); + + /* get this far and it's successful */ + debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); + rc = 0; + goto FINISHED; + + FAIL_FINISHED: + if ( mci ) { + kfree( mci ); + } + FINISHED: + return( rc ); +} + + +#ifdef CONFIG_PM + +static int e752x_suspend (struct pci_dev *pdev, u32 state) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + + +static int e752x_resume (struct pci_dev *pdev) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + +#endif /* CONFIG_PM */ + + +/* returns count (>= 0), or negative on error */ +static int __devinit e752x_init_one( struct pci_dev *pdev, + const struct pci_device_id *ent ) +{ + int rc; + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + /* wake up and enable device */ + if (pci_enable_device (pdev)) { + rc = -EIO; + } else { + rc = e752x_probe1( pdev, ent->driver_data ); + } + return rc; +} + + +static void __devexit e752x_remove_one( struct pci_dev *pdev ) +{ + struct mem_ctl_info *mci; + + debugf0( __FILE__ ": %s()\n", __func__); + + if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { + goto FINISHED; + } + + if ( 0 != bluesmoke_mc_del_mc( mci ) ) { + goto FINISHED; + } + + kfree( mci ); + + FINISHED: + return; +} + + +static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { + { PCI_VEND_DEV( INTEL, 7520_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7520 }, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, e752x_pci_tbl); + + +static struct pci_driver e752x_driver = { + name: BS_MOD_STR, + probe: e752x_init_one, + remove: __devexit_p(e752x_remove_one), + id_table: e752x_pci_tbl, +#ifdef CONFIG_PM + suspend: e752x_suspend, + resume: e752x_resume, +#endif /* CONFIG_PM */ +}; + + +int __init e752x_init(void) +{ + int pci_rc; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_rc = pci_module_init( &e752x_driver ); + if ( pci_rc < 0 ) return pci_rc; + + return 0; +} + + +static void __exit e752x_exit(void) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_unregister_driver( &e752x_driver ); +} + + +module_init(e752x_init); +module_exit(e752x_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n"); +MODULE_DESCRIPTION("MC support for Intel e752x memory controllers"); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e7xxx.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_e7xxx.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e7xxx.c 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,552 @@ +/* + * Intel e7xxx Memory Controller kernel module + * (C) 2003 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * See "enum e7xxx_chips" below for supported chipsets + * + * Written by Thayne Harbaugh + * Based on work by Dan Hollis and others. + * http://www.anime.net/~goemon/linux-ecc/ + * + * Contributors: + * Eric Biederman (Linux Networx) + * Tom Zimmerman (Linux Networx) + * Jim Garlic (Lawrence Livermore National Labs) + * Dave Peterson (Lawrence Livermore National Labs) + * That One Guy (Some other place) + * + * $Id: bluesmoke_e7xxx.c,v 1.5 2004/11/18 22:19:46 thayne Exp $ + * + */ + + +#include +#include +#include + +#include +#include + +#include + +#include "bluesmoke_mc.h" + + +#ifndef PCI_DEVICE_ID_INTEL_7205_0 +#define PCI_DEVICE_ID_INTEL_7205_0 0x255d +#endif /* PCI_DEVICE_ID_INTEL_7205_0 */ + +#ifndef PCI_DEVICE_ID_INTEL_7205_1_ERR +#define PCI_DEVICE_ID_INTEL_7205_1_ERR 0x2551 +#endif /* PCI_DEVICE_ID_INTEL_7205_1_ERR */ + +#ifndef PCI_DEVICE_ID_INTEL_7500_0 +#define PCI_DEVICE_ID_INTEL_7500_0 0x2540 +#endif /* PCI_DEVICE_ID_INTEL_7500_0 */ + +#ifndef PCI_DEVICE_ID_INTEL_7500_1_ERR +#define PCI_DEVICE_ID_INTEL_7500_1_ERR 0x2541 +#endif /* PCI_DEVICE_ID_INTEL_7500_1_ERR */ + +#ifndef PCI_DEVICE_ID_INTEL_7501_0 +#define PCI_DEVICE_ID_INTEL_7501_0 0x254c +#endif /* PCI_DEVICE_ID_INTEL_7501_0 */ + +#ifndef PCI_DEVICE_ID_INTEL_7501_1_ERR +#define PCI_DEVICE_ID_INTEL_7501_1_ERR 0x2541 +#endif /* PCI_DEVICE_ID_INTEL_7501_1_ERR */ + +#ifndef PCI_DEVICE_ID_INTEL_7505_0 +#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 +#endif /* PCI_DEVICE_ID_INTEL_7505_0 */ + +#ifndef PCI_DEVICE_ID_INTEL_7505_1_ERR +#define PCI_DEVICE_ID_INTEL_7505_1_ERR 0x2551 +#endif /* PCI_DEVICE_ID_INTEL_7505_1_ERR */ + + +#define E7XXX_NR_CSROWS 8 /* number of csrows */ +#define E7XXX_NR_DIMMS 8 /* FIXME - is this correct? */ + + +/* E7XXX register addresses - device 0 function 0 */ +#define E7XXX_DRB 0x60 /* DRAM row boundary register (8b) */ +#define E7XXX_DRA 0x70 /* DRAM row attribute register (8b) */ + /* + * 31 Device width row 7 0=x8 1=x4 + * 27 Device width row 6 + * 23 Device width row 5 + * 19 Device width row 4 + * 15 Device width row 3 + * 11 Device width row 2 + * 7 Device width row 1 + * 3 Device width row 0 + */ +#define E7XXX_DRC 0x7C /* DRAM controller mode reg (32b) */ + /* + * 22 Number channels 0=1,1=2 + * 19:18 DRB Granularity 32/64MB + */ +#define E7XXX_TOLM 0xC4 /* DRAM top of low memory reg (16b) */ +#define E7XXX_REMAPBASE 0xC6 /* DRAM remap base address reg (16b) */ +#define E7XXX_REMAPLIMIT 0xC8 /* DRAM remap limit address reg (16b) */ + +/* E7XXX register addresses - device 0 function 1 */ +#define E7XXX_DRAM_FERR 0x80 /* DRAM first error register (8b) */ +#define E7XXX_DRAM_NERR 0x82 /* DRAM next error register (8b) */ +#define E7XXX_DRAM_CELOG_ADD 0xA0 /* DRAM first correctable memory */ + /* error address register (32b) */ + /* + * 31:28 Reserved + * 27:6 CE address (4k block 33:12) + * 5:0 Reserved + */ +#define E7XXX_DRAM_UELOG_ADD 0xB0 /* DRAM first uncorrectable memory */ + /* error address register (32b) */ + /* + * 31:28 Reserved + * 27:6 CE address (4k block 33:12) + * 5:0 Reserved + */ +#define E7XXX_DRAM_CELOG_SYNDROME 0xD0 /* DRAM first correctable memory */ + /* error syndrome register (16b) */ + +enum e7xxx_chips { + E7500 = 0, + E7501, + E7505, + E7205, +}; + + +struct e7xxx_pvt { + struct pci_dev *bridge_ck; + u32 tolm; + u32 remapbase; + u32 remaplimit; + const struct e7xxx_dev_info *dev_info; +}; + + +struct e7xxx_dev_info { + u16 err_dev; + const char *ctl_name; +}; + + +static const struct e7xxx_dev_info e7xxx_devs[] = { + [E7500] = { + .err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR, + .ctl_name = "E7500" + }, + [E7501] = { + .err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR, + .ctl_name = "E7501" + }, + [E7505] = { + .err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR, + .ctl_name = "E7505" + }, + [E7205] = { + .err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR, + .ctl_name = "E7205" + }, +}; + + +/* FIXME - is this valid for both SECDED and S4ECD4ED? */ +static inline int e7xxx_find_channel(u16 syndrome) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + if((syndrome & 0xff00)==0) + return(0); + if((syndrome & 0x00ff)==0) + return(1); + if((syndrome & 0xf000)==0) + return(0); + if((syndrome & 0x0f00)==0) + return(0); + return(1); +} + + +static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, + unsigned long page) +{ + u32 remap; + struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + if(page < pvt->tolm) + return(page); + if((page >= 0x100000)&&(page < pvt->remapbase)) + return(page); + remap = (page - pvt->tolm) + pvt->remapbase; + if(remap < pvt->remaplimit) + return(remap); + printk(KERN_ERR "Invalid page %lx - out of range\n", page); + return(pvt->tolm-1); +} + + +static void process_ce(struct mem_ctl_info *mci) +{ + u32 error_1b, page; + u16 syndrome; + int row; + int channel; + struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + /* read the error address */ + pci_read_config_dword(pvt->bridge_ck,E7XXX_DRAM_CELOG_ADD, + &error_1b); + /* FIXME - should use PAGE_SHIFT */ + page = error_1b >>6; /* convert the address to 4k page */ + /* read the syndrome */ + pci_read_config_word(pvt->bridge_ck,E7XXX_DRAM_CELOG_SYNDROME, + &syndrome); + /* FIXME - check for -1 */ + row = bluesmoke_mc_find_csrow_by_page( mci, page ); + channel = e7xxx_find_channel(syndrome); /* convert syndrome to channel */ + bluesmoke_mc_handle_ce( mci, page, 0, syndrome, + row, channel, "e7xxx CE" ); +} + + +static void process_ce_no_info(struct mem_ctl_info *mci) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + bluesmoke_mc_handle_ce_no_info( mci, "e7xxx CE log register overflow" ); +} + + +static void process_ue(struct mem_ctl_info *mci) +{ + u32 error_2b, block_page; + int row; + struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + /* read the error address */ + pci_read_config_dword( pvt->bridge_ck, E7XXX_DRAM_UELOG_ADD, + &error_2b ); + /* FIXME - should use PAGE_SHIFT */ + block_page = error_2b >>6; /* convert to 4k address */ + row = bluesmoke_mc_find_csrow_by_page( mci, block_page ); + bluesmoke_mc_handle_ue( mci, block_page, 0, row, "e7xxx UE" ); +} + + +static void process_ue_no_info(struct mem_ctl_info *mci) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + bluesmoke_mc_handle_ue_no_info( mci, "e7xxx UE log register overflow" ); +} + + +static void e7xxx_check(struct mem_ctl_info *mci) +{ + u8 error_one, error_next; + struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + + pci_read_config_byte(pvt->bridge_ck,E7XXX_DRAM_FERR,&error_one); + pci_read_config_byte(pvt->bridge_ck,E7XXX_DRAM_NERR,&error_next); + + /* clear any error bits */ + if(error_one & 3) { + pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03); + } + if(error_next & 3) { + pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03); + } + + /* decode and report errors */ + if(error_one & 1) { /* check first error correctable */ + process_ce(mci); + if(error_next & 1) { /* check next error correctable */ + process_ce_no_info(mci); + } + if(error_next & 2) { /* check next error uncorrectable */ + process_ue(mci); + } + } else if(error_one & 2) { /* check first error uncorrectable */ + process_ue(mci); + if(error_next & 1) { /* check next error correctable */ + process_ce(mci); + } + if(error_next & 2) { /* check next error uncorrectable */ + process_ue_no_info(mci); + } + } +} + + +static int e7xxx_probe1( struct pci_dev *pdev, int dev_idx ) +{ + int rc = -ENODEV; + int index; + u16 pci_data; + struct mem_ctl_info *mci = NULL; + struct e7xxx_pvt *pvt = NULL; + u32 drc; + int drc_chan; /* Number of channels 0=1chan,1=2chan */ + int drc_drbg; /* DRB granularity 0=32mb,1=64mb */ + int drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ + u32 dra; + unsigned long last_cumul_size; + + + debugf0( "MC: " __FILE__ ": %s(): mci\n", __func__ ); + + /* need to find out the number of channels */ + pci_read_config_dword(pdev, E7XXX_DRC, &drc); + drc_chan = ( ( drc >> 22 ) & 0x1 ); + drc_drbg = ( drc >> 18 ) & 0x3; + drc_ddim = ( drc >> 20 ) & 0x3; + + mci = bluesmoke_mc_init_structs(sizeof(*pvt), + E7XXX_NR_CSROWS, + drc_chan + 1); + + if ( ! mci ) { + rc = -ENOMEM; + goto FAIL_FINISHED; + } + + debugf3( "MC: " __FILE__ ": %s(): init mci\n", __func__ ); + + mci->mtype_cap = MEM_FLAG_RDDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | EDAC_FLAG_S4ECD4ED; + /* FIXME - what if different memory types are in different csrows? */ + mci->mod_name = BS_MOD_STR; + mci->mod_ver = "$Revision: 1.5 $"; + mci->pdev = pdev; + + debugf3( "MC: " __FILE__ ": %s(): init pvt\n", __func__ ); + pvt = (struct e7xxx_pvt *)mci->pvt_info; + pvt->dev_info = &e7xxx_devs[dev_idx]; + pvt->bridge_ck = pci_find_device( PCI_VENDOR_ID_INTEL, + pvt->dev_info->err_dev, + pvt->bridge_ck ); + if ( ! pvt->bridge_ck ) { + printk( KERN_ERR + "MC: error reporting device not found:" + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, e7xxx_devs[dev_idx].err_dev ); + goto FAIL_FINISHED; + } + + debugf3( "MC: " __FILE__ ": %s(): more mci init\n", __func__ ); + mci->ctl_name = pvt->dev_info->ctl_name; + + mci->edac_check = e7xxx_check; + /* FIXME - why isn't clear_err set to something? */ + mci->clear_err = NULL; + mci->ctl_page_to_phys = ctl_page_to_phys; + + /* find out the device types */ + pci_read_config_dword(pdev, E7XXX_DRA, &dra); + + /* + * The dram row boundary (DRB) reg values are boundary address + * for each DRAM row with a granularity of 32 or 64MB (single/dual + * channel operation). DRB regs are cumulative; therefore DRB7 will + * contain the total memory contained in all eight rows. + */ + for( last_cumul_size = index = 0; index < mci->nr_csrows; index++ ) { + u8 value; + u32 cumul_size; + /* mem_dev 0=x8, 1=x4 */ + int mem_dev = ( dra >> ( index * 4 + 3 ) ) & 0x1; + struct csrow_info *csrow = &mci->csrows[ index ]; + + pci_read_config_byte(mci->pdev, E7XXX_DRB + index, &value); + /* convert a 64 or 32 MiB DRB to a page size. */ + cumul_size = value << (25 + drc_drbg - PAGE_SHIFT ); + debugf3( "MC: " __FILE__ ": %s(): (%d) cumul_size 0x%x\n", + __func__, index, cumul_size ); + if ( cumul_size == last_cumul_size ) { + continue; /* not populated */ + } + + csrow->first_page = last_cumul_size; + csrow->last_page = cumul_size - 1; + csrow->nr_pages = cumul_size - last_cumul_size; + last_cumul_size = cumul_size; + csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */ + csrow->mtype = MEM_RDDR; /* only one type supported */ + csrow->dtype = mem_dev ? DEV_X4 : DEV_X8; + + /* + * if single channel or x8 devices then SECDED + * if dual channel and x4 then S4ECD4ED + */ + if ( drc_ddim ) { + if ( drc_chan && mem_dev ) { + csrow->edac_mode = EDAC_S4ECD4ED; + mci->edac_cap |= EDAC_FLAG_S4ECD4ED; + } else { + csrow->edac_mode = EDAC_SECDED; + mci->edac_cap |= EDAC_FLAG_SECDED; + } + } else { + csrow->edac_mode = EDAC_NONE; + } + } + + mci->edac_cap |= EDAC_FLAG_NONE; + + debugf3( "MC: " __FILE__ ": %s(): tolm, remapbase, remaplimit\n", __func__ ); + /* load the top of low memory, remap base, and remap limit vars */ + pci_read_config_word(mci->pdev, E7XXX_TOLM, &pci_data); + pvt->tolm = ((u32)pci_data) << 4; + pci_read_config_word(mci->pdev, E7XXX_REMAPBASE, &pci_data); + pvt->remapbase = ((u32)pci_data) << 14; + pci_read_config_word(mci->pdev, E7XXX_REMAPLIMIT, &pci_data); + pvt->remaplimit = ((u32)pci_data) << 14; + printk( "tolm = %x, remapbase = %x, remaplimit = %x\n", + pvt->tolm, pvt->remapbase, pvt->remaplimit); + + /* clear any pending errors, or initial state bits */ + pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03); + pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03); + + if ( 0 != bluesmoke_mc_add_mc( mci ) ) { + debugf3( "MC: " __FILE__ ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); + goto FAIL_FINISHED; + } + + /* get this far and it's successful */ + debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); + rc = 0; + goto FINISHED; + + FAIL_FINISHED: + if ( mci ) { + kfree( mci ); + } + FINISHED: + return( rc ); +} + + +#ifdef CONFIG_PM + +static int e7xxx_suspend (struct pci_dev *pdev, u32 state) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + + +static int e7xxx_resume (struct pci_dev *pdev) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + +#endif /* CONFIG_PM */ + + +/* returns count (>= 0), or negative on error */ +static int __devinit e7xxx_init_one( struct pci_dev *pdev, + const struct pci_device_id *ent ) +{ + int rc; + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + /* wake up and enable device */ + if (pci_enable_device (pdev)) { + rc = -EIO; + } else { + rc = e7xxx_probe1( pdev, ent->driver_data ); + } + return rc; +} + + +static void __devexit e7xxx_remove_one( struct pci_dev *pdev ) +{ + struct mem_ctl_info *mci; + + debugf0( __FILE__ ": %s()\n", __func__); + + if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { + goto FINISHED; + } + + if ( 0 != bluesmoke_mc_del_mc( mci ) ) { + goto FINISHED; + } + + kfree( mci ); + + FINISHED: + return; +} + + +static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = { + { PCI_VEND_DEV( INTEL, 7205_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7205 }, + { PCI_VEND_DEV( INTEL, 7500_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7500 }, + { PCI_VEND_DEV( INTEL, 7501_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7501 }, + { PCI_VEND_DEV( INTEL, 7505_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7505 }, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl); + + +static struct pci_driver e7xxx_driver = { + .name = BS_MOD_STR, + .probe = e7xxx_init_one, + .remove = __devexit_p(e7xxx_remove_one), + .id_table = e7xxx_pci_tbl, +#ifdef CONFIG_PM + .suspend = e7xxx_suspend, + .resume = e7xxx_resume, +#endif /* CONFIG_PM */ +}; + + +int __init e7xxx_init(void) +{ + int pci_rc; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_rc = pci_module_init( &e7xxx_driver ); + if ( pci_rc < 0 ) return pci_rc; + + return 0; +} + + +static void __exit e7xxx_exit(void) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_unregister_driver( &e7xxx_driver ); +} + + +module_init(e7xxx_init); +module_exit(e7xxx_exit); + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" + "Based on.work by Dan Hollis et al"); +MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers"); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_i82875p.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_i82875p.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_i82875p.c 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,510 @@ +/* + * AMD 76x Memory Controller kernel module + * (C) 2003 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Thayne Harbaugh + * + * $Id: bluesmoke_i82875p.c,v 1.5 2004/11/18 22:19:46 thayne Exp $ + * + */ + + +#include +#include +#include + +#include +#include + +#include + +#include "bluesmoke_mc.h" + + +#ifndef PCI_DEVICE_ID_INTEL_82875_0 +#define PCI_DEVICE_ID_INTEL_82875_0 0x2578 +#endif /* PCI_DEVICE_ID_INTEL_82875_0 */ + +#ifndef PCI_DEVICE_ID_INTEL_82875_6 +#define PCI_DEVICE_ID_INTEL_82875_6 0x257e +#endif /* PCI_DEVICE_ID_INTEL_82875_6 */ + + +/* four csrows in dual channel, eight in single channel */ +#define I82875P_NR_CSROWS(nr_chans) (8/(nr_chans)) + + +/* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */ +#define I82875P_EAP 0x58 /* Error Address Pointer (32b) + * + * 31:12 block address + * 11:0 reserved + */ + +#define I82875P_DERRSYN 0x5c /* DRAM Error Syndrome (8b) + * + * 7:0 DRAM ECC Syndrome + */ + +#define I82875P_DES 0x5d /* DRAM Error Status (8b) + * + * 7:1 reserved + * 0 Error channel 0/1 + */ + +#define I82875P_ERRSTS 0xc8 /* Error Status Register (16b) + * + * 15:10 reserved + * 9 non-DRAM lock error (ndlock) + * 8 Sftwr Generated SMI + * 7 ECC UE + * 6 reserved + * 5 MCH detects unimplemented cycle + * 4 AGP access outside GA + * 3 Invalid AGP access + * 2 Invalid GA translation table + * 1 Unsupported AGP command + * 0 ECC CE + */ + +#define I82875P_ERRCMD 0xca /* Error Command (16b) + * + * 15:10 reserved + * 9 SERR on non-DRAM lock + * 8 SERR on ECC UE + * 7 SERR on ECC CE + * 6 target abort on high exception + * 5 detect unimplemented cyc + * 4 AGP access outside of GA + * 3 SERR on invalid AGP access + * 2 invalid translation table + * 1 SERR on unsupported AGP command + * 0 reserved + */ + + +/* Intel 82875p register addresses - device 6 function 0 - DRAM Controller */ +#define I82875P_PCICMD6 0x04 /* PCI Command Register (16b) + * + * 15:10 reserved + * 9 fast back-to-back - ro 0 + * 8 SERR enable - ro 0 + * 7 addr/data stepping - ro 0 + * 6 parity err enable - ro 0 + * 5 VGA palette snoop - ro 0 + * 4 mem wr & invalidate - ro 0 + * 3 special cycle - ro 0 + * 2 bus master - ro 0 + * 1 mem access dev6 - 0(dis),1(en) + * 0 IO access dev3 - 0(dis),1(en) + */ + +#define I82875P_BAR6 0x10 /* Mem Delays Base ADDR Reg (32b) + * + * 31:12 mem base addr [31:12] + * 11:4 address mask - ro 0 + * 3 prefetchable - ro 0(non),1(pre) + * 2:1 mem type - ro 0 + * 0 mem space - ro 0 + */ + +/* Intel 82875p MMIO register space - device 0 function 0 - MMR space */ + +#define I82875P_DRB_SHIFT 26 /* 64MiB grain */ +#define I82875P_DRB 0x00 /* DRAM Row Boundary (8b x 8) + * + * 7 reserved + * 6:0 64MiB row boundary addr + */ + +#define I82875P_DRA 0x10 /* DRAM Row Attribute (4b x 8) + * + * 7 reserved + * 6:4 row attr row 1 + * 3 reserved + * 2:0 row attr row 0 + * + * 000 = 4KiB + * 001 = 8KiB + * 010 = 16KiB + * 011 = 32KiB + */ + +#define I82875P_DRC 0x68 /* DRAM Controller Mode (32b) + * + * 31:30 reserved + * 29 init complete + * 28:23 reserved + * 22:21 nr chan 00=1,01=2 + * 20 reserved + * 19:18 Data Integ Mode 00=none,01=ecc + * 17:11 reserved + * 10:8 refresh mode + * 7 reserved + * 6:4 mode select + * 3:2 reserved + * 1:0 DRAM type 01=DDR + */ + + +enum i82875p_chips { + I82875P = 0, +}; + + +struct i82875p_pvt { + struct pci_dev *ovrfl_pdev; + void *ovrfl_window; +}; + + +struct i82875p_dev_info { + const char *ctl_name; +}; + + +static const struct i82875p_dev_info i82875p_devs[] = { + [I82875P] = { + .ctl_name = "i828875p" + }, +}; + + +static void i82875p_check(struct mem_ctl_info *mci) +{ + u8 des; + u8 derrsyn; + u16 errsts, errsts2; + u32 eap; + int row; + int multi_chan = mci->csrows[0].nr_channels - 1; + + debugf1( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); + + /* + * This is a mess because there is no atomic way to read all + * the registers at once and the registers can transition + * from CE being overwritten by UE. + */ + pci_read_config_word( mci->pdev, I82875P_ERRSTS, &errsts ); + pci_read_config_dword( mci->pdev, I82875P_EAP, &eap ); + pci_read_config_byte( mci->pdev, I82875P_DES, &des ); + pci_read_config_byte( mci->pdev, I82875P_DERRSYN, &derrsyn ); + pci_read_config_word( mci->pdev, I82875P_ERRSTS, &errsts2 ); + + pci_write_bits16( mci->pdev, I82875P_ERRSTS, 0x0081, 0x0081 ); + + /* + * If the error is the same then we can for both reads then + * the first set of reads is valid. If there is a change then + * there is a CE no info and the second set of reads is valid + * and should be UE info. + */ + if (! (errsts2 & 0x0081) ) return; + if ( (errsts ^ errsts2) & 0x0081 ) { + bluesmoke_mc_handle_ce_no_info( mci, "UE overwrote CE" ); + errsts = errsts2; + pci_read_config_dword( mci->pdev, I82875P_EAP, &eap ); + pci_read_config_byte( mci->pdev, I82875P_DES, &des ); + pci_read_config_byte( mci->pdev, I82875P_DERRSYN, &derrsyn ); + } + + eap >>= PAGE_SHIFT; + row = bluesmoke_mc_find_csrow_by_page( mci, eap ); + + if ( errsts & 0x0080 ) { + bluesmoke_mc_handle_ue( mci, eap, 0, row, "i82875p UE" ); + } else { + bluesmoke_mc_handle_ce( mci, eap, 0, derrsyn, row, + multi_chan ? (des & 0x1) : 0, + "i82875p UE" ); + } + + return; +} + + +static int i82875p_probe1( struct pci_dev *pdev, int dev_idx ) +{ + int rc = -ENODEV; + int index; + struct mem_ctl_info *mci = NULL; + struct i82875p_pvt *pvt = NULL; + unsigned long last_cumul_size; + struct pci_dev *ovrfl_pdev; + void *ovrfl_window = NULL; + + u32 drc; + u32 drc_chan; /* Number of channels 0=1chan,1=2chan */ + u32 nr_chans; + u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + ovrfl_pdev = pci_find_device( PCI_VEND_DEV( INTEL, 82875_6 ), NULL ); + + if ( ! ovrfl_pdev ) { + /* + * Intel tells BIOS developers to hide device 6 which + * configures the overflow device access containing + * the DRBs - this is where we expose device 6. + * http://www.x86-secret.com/articles/tweak/pat/patsecrets-2.htm + */ + pci_write_bits8( pdev, 0xf4, 0x2, 0x2 ); + ovrfl_pdev = pci_scan_single_device( pdev->bus, PCI_DEVFN( 6, 0 ) ); + if ( ! ovrfl_pdev ) { + goto FAIL_FINISHED; + } + } + +#ifdef CONFIG_PROC_FS + if ( !ovrfl_pdev->procent && pci_proc_attach_device(ovrfl_pdev)) { + printk( KERN_ERR "MC: " __FILE__ + ": %s(): Failed to attach overflow device\n", + __func__ ); + goto FAIL_FINISHED; + } +#endif /* CONFIG_PROC_FS */ + if (pci_enable_device(ovrfl_pdev)) { + printk( KERN_ERR "MC: " __FILE__ + ": %s(): Failed to enable overflow device\n", + __func__ ); + goto FAIL_FINISHED; + } + if (pci_request_regions(ovrfl_pdev, pci_name(ovrfl_pdev))) { + printk( KERN_ERR "MC: " __FILE__ + ": %s(): Failed to reserve regions - broken BIOS?\n", + __func__ ); +#ifdef CORRECT_BIOS + goto FAIL_FINISHED; +#endif /* CORRECT_BIOS */ + } + + /* cache is irrelevant for PCI bus reads/writes */ + ovrfl_window = ioremap_nocache(pci_resource_start(ovrfl_pdev, 0), + pci_resource_len(ovrfl_pdev, 0)); + + if (!ovrfl_window) { + printk( KERN_ERR "MC: " __FILE__ + ": %s(): Failed to ioremap bar6\n", + __func__ ); + goto FAIL_FINISHED; + } + + /* need to find out the number of channels */ + drc = readl(ovrfl_window + I82875P_DRC); + drc_chan = ( ( drc >> 21 ) & 0x1 ); + nr_chans = drc_chan + 1; + drc_ddim = ( drc >> 18 ) & 0x1; + + mci = bluesmoke_mc_init_structs(sizeof(*pvt), + I82875P_NR_CSROWS(nr_chans), + nr_chans ); + + if ( ! mci ) { + rc = -ENOMEM; + goto FAIL_FINISHED; + } + + debugf3( "MC: " __FILE__ ": %s(): init mci\n", __func__ ); + + mci->pdev = pdev; + mci->mtype_cap = MEM_FLAG_RDDR; + + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_UNKNOWN; + /* adjust FLAGS */ + + mci->mod_name = BS_MOD_STR; + mci->mod_ver = "$Revision: 1.5 $"; + mci->ctl_name = i82875p_devs[dev_idx].ctl_name; + mci->edac_check = i82875p_check; + mci->clear_err = NULL; + mci->ctl_page_to_phys = NULL; + + debugf3( "MC: " __FILE__ ": %s(): init pvt\n", __func__ ); + + pvt = (struct i82875p_pvt *)mci->pvt_info; + pvt->ovrfl_pdev = ovrfl_pdev; + pvt->ovrfl_window = ovrfl_window; + + /* + * The dram row boundary (DRB) reg values are boundary address + * for each DRAM row with a granularity of 32 or 64MB (single/dual + * channel operation). DRB regs are cumulative; therefore DRB7 will + * contain the total memory contained in all eight rows. + */ + for( last_cumul_size = index = 0; index < mci->nr_csrows; index++ ) { + u8 value; + u32 cumul_size; + struct csrow_info *csrow = &mci->csrows[ index ]; + + value = readb(ovrfl_window + I82875P_DRB + index); + cumul_size = value << ( I82875P_DRB_SHIFT - PAGE_SHIFT ); + debugf3( "MC: " __FILE__ ": %s(): (%d) cumul_size 0x%x\n", + __func__, index, cumul_size ); + if ( cumul_size == last_cumul_size ) { + continue; /* not populated */ + } + + csrow->first_page = last_cumul_size; + csrow->last_page = cumul_size - 1; + csrow->nr_pages = cumul_size - last_cumul_size; + last_cumul_size = cumul_size; + csrow->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */ + csrow->mtype = MEM_DDR; + csrow->dtype = DEV_UNKNOWN; + csrow->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE; + } + + /* clear counters */ + pci_write_bits16( mci->pdev, I82875P_ERRSTS, 0x0081, 0x0081 ); + + if ( 0 != bluesmoke_mc_add_mc( mci ) ) { + debugf3( "MC: " __FILE__ + ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); + goto FAIL_FINISHED; + } + + /* get this far and it's successful */ + debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); + rc = 0; + goto FINISHED; + + FAIL_FINISHED: + if ( mci ) { + kfree( mci ); + } + + if (ovrfl_window) { + iounmap(ovrfl_window); + } + + if (ovrfl_pdev) { + pci_release_regions( ovrfl_pdev ); + pci_disable_device( ovrfl_pdev ); + } + + FINISHED: + /* NOTE: the ovrfl proc entry and pci_dev are intentionally left */ + return( rc ); +} + + +#ifdef CONFIG_PM + +static int i82875p_suspend (struct pci_dev *pdev, u32 state) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + + +static int i82875p_resume (struct pci_dev *pdev) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + +#endif /* CONFIG_PM */ + + +/* returns count (>= 0), or negative on error */ +static int __devinit i82875p_init_one( struct pci_dev *pdev, + const struct pci_device_id *ent ) +{ + int rc; + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + if (pci_enable_device (pdev)) { + rc = -EIO; + } else { + rc = i82875p_probe1( pdev, ent->driver_data ); + } + return rc; +} + + +static void __devexit i82875p_remove_one( struct pci_dev *pdev ) +{ + struct mem_ctl_info *mci; + struct i82875p_pvt *pvt = NULL; + + debugf0( __FILE__ ": %s()\n", __func__); + + if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { + goto FINISHED; + } + + pvt = (struct i82875p_pvt *)mci->pvt_info; + if (pvt->ovrfl_window) { + iounmap(pvt->ovrfl_window); + } + + if (pvt->ovrfl_pdev) { + pci_release_regions( pvt->ovrfl_pdev ); + pci_disable_device( pvt->ovrfl_pdev ); + } + + if ( 0 != bluesmoke_mc_del_mc( mci ) ) { + goto FINISHED; + } + + kfree( mci ); + + FINISHED: + return; +} + + +static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = { + { PCI_VEND_DEV( INTEL, 82875_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82875P }, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl); + + +static struct pci_driver i82875p_driver = { + .name = BS_MOD_STR, + .probe = i82875p_init_one, + .remove = __devexit_p(i82875p_remove_one), + .id_table = i82875p_pci_tbl, +#ifdef CONFIG_PM + .suspend = i82875p_suspend, + .resume = i82875p_resume, +#endif /* CONFIG_PM */ +}; + + +int __init i82875p_init(void) +{ + int pci_rc; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_rc = pci_module_init( &i82875p_driver ); + if ( pci_rc < 0 ) return pci_rc; + + return 0; +} + + +static void __exit i82875p_exit(void) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_unregister_driver( &i82875p_driver ); +} + + +module_init(i82875p_init); +module_exit(i82875p_exit); + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); +MODULE_DESCRIPTION("MC support for Intel 82875 memory hub controllers"); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_k8.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_k8.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_k8.c 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,1252 @@ +/* + * AMD K8 class Memory Controller kernel module + * (C) 2003 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Thayne Harbaugh + * + * $Id: bluesmoke_k8.c,v 1.6 2004/11/23 01:34:25 thayne Exp $ + * + */ + + +#include +#include +#include + +#include +#include + +#include + +#include "bluesmoke_mc.h" + + +#ifndef PCI_DEVICE_ID_AMD_OPT_0_HT +#define PCI_DEVICE_ID_AMD_OPT_0_HT 0x1100 +#endif /* PCI_DEVICE_ID_AMD_OPT_0_HT */ + +#ifndef PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP +#define PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP 0x1101 +#endif /* PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP */ + +#ifndef PCI_DEVICE_ID_AMD_OPT_2_MEMCTL +#define PCI_DEVICE_ID_AMD_OPT_2_MEMCTL 0x1102 +#endif /* PCI_DEVICE_ID_AMD_OPT_2_MEMCTL */ + +#ifndef PCI_DEVICE_ID_AMD_OPT_3_MISCCTL +#define PCI_DEVICE_ID_AMD_OPT_3_MISCCTL 0x1103 +#endif /* PCI_DEVICE_ID_AMD_OPT_3_MISCCTL */ + + +#define K8_NR_CSROWS 8 + + +/* K8 register addresses - device 0 function 1 - Address Map */ +#define K8_DBR 0x40 /* DRAM Base Register (8 x 32b + * interlaced with K8_DLR) + * + * 31:16 DRAM Base addr 39:24 + * 15:11 reserved + * 10:8 interleave enable + * 7:2 reserved + * 1 write enable + * 0 read enable + */ +#define K8_DLR 0x44 /* DRAM Limit Register (8 x 32b + * interlaced with K8_DBR) + * + * 31:16 DRAM Limit addr 32:24 + * 15:11 reserved + * 10:8 interleave select + * 7:3 reserved + * 2:0 destination node ID + */ + + +/* K8 register addresses - device 0 function 2 - DRAM controller */ +#define K8_DCSB 0x40 /* DRAM Chip-Select Base (8 x 32b) + * + * 31:21 Base addr high 35:25 + * 20:16 reserved + * 15:9 Base addr low 19:13 (interlvd) + * 8:1 reserved + * 0 chip-select bank enable + */ +#define K8_DCSM 0x60 /* DRAM Chip-Select Mask (8 x 32b) + * + * 31:30 reserved + * 29:21 addr mask high 33:25 + * 20:16 reserved + * 15:9 addr mask low 19:13 + * 8:0 reserved + */ + +#define K8_DBAM 0x80 /* DRAM Base Addr Mapping (32b) */ +#define K8_DCL 0x90 /* DRAM configuration low reg (32b) + * + * 31:28 reserved + * 27:25 Bypass Max: 000b=respect + * 24 Dissable receivers - no sockets + * 23:20 x4 DIMMS + * 19 32byte chunks + * 18 Unbuffered + * 17 ECC enabled + * 16 128/64 bit (dual/single chan) + * 15:14 R/W Queue bypass count + * 13 Self refresh + * 12 exit self refresh + * 11 mem clear status + * 10 DRAM enable + * 9 reserved + * 8 DRAM init + * 7:4 reserved + * 3 dis DQS hysteresis + * 2 QFC enabled + * 1 DRAM drive strength + * 0 Digital Locked Loop disable + */ + + +/* K8 register addresses - device 0 function 3 - Misc Control */ +#define K8_NBCTL 0x40 /* MCA NB Control (32b) + * + * 1 MCA UE Reporting + * 0 MCA CE Reporting + */ +#define K8_NBCFG 0x44 /* MCA NB Config (32b) + * + * 23 Chip-kill x4 ECC enable + * 22 ECC enable + * 1 CPU ECC enable + */ +#define K8_NBSL 0x48 /* MCA NB Status Low (32b) + * + * 31:24 Syndrome 15:8 chip-kill x4 + * 23:20 reserved + * 19:16 Extended err code + * 15:0 Err code + */ +#define K8_NBSH 0x4C /* MCA NB Status High (32b) + * + * 31 Err valid + * 30 Err overflow + * 29 Uncorrected err + * 28 Err enable + * 27 Misc err reg valid + * 26 Err addr valid + * 25 proc context corrupt + * 24:23 reserved + * 22:15 Syndrome 7:0 + * 14 CE + * 13 UE + * 12:9 reserved + * 8 err found by scrubber + * 7 reserved + * 6:4 Hyper-transport link number + * 3:2 reserved + * 1 Err CPU 1 + * 0 Err CPU 0 + */ +#define K8_NBEAL 0x50 /* MCA NB err addr low (32b) + * + * 31:3 Err addr low 31:3 + * 2:0 reserved + */ +#define K8_NBEAH 0x54 /* MCA NB err addr high (32b) + * + * 31:8 reserved + * 7:0 Err addr high 39:32 + */ +#define K8_NBCAP 0xE8 /* MCA NB capabilities (32b) + * + * 31:9 reserved + * 4 S4ECD4ED capable + * 3 SECDED capable + */ + + + /* MSR's */ + /* + * K8_MSR_MCxCTL (64b) + * (0x400,404,408,40C,410) + * 63 Enable reporting source 63 + * . + * . + * . + * 2 Enable error source 2 + * 1 Enable error source 1 + * 0 Enable error source 0 + */ + /* + * K8_MSR_MCxSTAT (64b) + * (0x401,405,409,40D,411) + * 63 Error valid + * 62 Status overflow + * 61 UE + * 60 Enabled error condition + * 59 Misc register valid (not used) + * 58 Err addr register valid + * 57 Processor context corrupt + * 56:32 Other information + * 31:16 Model specific error code + * 15:0 MCA err code + */ + /* + * K8_MSR_MCxADDR (64b) + * (0x402,406,40A,40E,412) + * 63:48 reserved + * 47:0 Address + */ + /* + * K8_MSR_MCxMISC (64b) + * (0x403,407,40B,40F,413) + * Unused on Athlon64 and K8 + */ + +#define K8_MSR_MCGCTL 0x017b /* Machine Chk Global report ctl (64b) + * + * 31:5 reserved + * 4 North Bridge + * 3 Load/Store + * 2 Bus Unit + * 1 Instruction Cache + * 0 Data Cache + */ +#define K8_MSR_MC4CTL 0x0410 /* North Bridge Check report ctl (64b) */ +#define K8_MSR_MC4STAT 0x0411 /* North Bridge status (64b) */ +#define K8_MSR_MC4ADDR 0x0412 /* North Bridge Address (64b) */ + + +#define MCI2NID(mci) (PCI_SLOT(mci->pdev->devfn) - 0x18) + + +enum k8_chips { + OPTERON = 0, +}; + + +struct k8_pvt { + struct pci_dev *addr_map; + struct pci_dev *misc_ctl; +}; + + +struct k8_dev_info { + const char *ctl_name; + u16 addr_map; + u16 misc_ctl; +}; + + +static const struct k8_dev_info k8_devs[] = { + [OPTERON] = { + .ctl_name = "Athlon64/Opteron", + .addr_map = PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP, + .misc_ctl = PCI_DEVICE_ID_AMD_OPT_3_MISCCTL + }, +}; + + +static inline void pci_find_related_function( unsigned int vendor, + unsigned int device, + struct pci_dev **from, + struct pci_dev *related ) +{ + do { + *from = pci_find_device( vendor, device, *from ); + if ( ! *from ) return; + + if ( ((*from)->bus->number == related->bus->number) + && (PCI_SLOT((*from)->devfn) + == PCI_SLOT(related->devfn)) ) { + return; + } + } while ( 1 ); +} + + +/* FIXME - stolen from msr.c - the calls in msr.c could be exported */ +#ifdef CONFIG_SMP + +struct msr_command { + int cpu; + int err; + u32 reg; + u32 data[2]; +}; + + +static void msr_smp_wrmsr(void *cmd_block) +{ + struct msr_command *cmd = (struct msr_command *) cmd_block; + + debugf1( "MC: " __FILE__ ": %s(): %d ? %d\n", + __func__, cmd->cpu, smp_processor_id() ); + + if ( cmd->cpu == smp_processor_id() ) { + debugf1( "MC: " __FILE__ ": %s(): Matched %d\n", + __func__, cmd->cpu ); + wrmsr(cmd->reg, cmd->data[0], cmd->data[1]); + } +} + + +static void msr_smp_rdmsr(void *cmd_block) +{ + struct msr_command *cmd = (struct msr_command *) cmd_block; + + debugf1( "MC: " __FILE__ ": %s(): %d ? %d\n", + __func__, cmd->cpu, smp_processor_id() ); + + if ( cmd->cpu == smp_processor_id() ) { + debugf1( "MC: " __FILE__ ": %s(): Matched %d\n", + __func__, cmd->cpu ); + rdmsr(cmd->reg, cmd->data[0], cmd->data[1]); + } +} + + +static inline void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) +{ + struct msr_command cmd; + + debugf0( "MC: " __FILE__ ": %s(): %d\n", __func__, cpu ); + + if ( cpu == smp_processor_id() ) { + wrmsr(reg, eax, edx); + } else { + cmd.cpu = cpu; + cmd.reg = reg; + cmd.data[0] = eax; + cmd.data[1] = edx; + + smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); + } +} + + +static inline void do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) +{ + struct msr_command cmd; + + debugf0( "MC: " __FILE__ ": %s(): %d\n", __func__, cpu ); + + if ( cpu == smp_processor_id() ) { + rdmsr(reg, eax, edx); + } else { + cmd.cpu = cpu; + cmd.reg = reg; + + smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); + + *eax = cmd.data[0]; + *edx = cmd.data[1]; + } +} + +#else /* ! CONFIG_SMP */ + +static inline void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) +{ + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + wrmsr(reg, eax, edx); +} + + +static inline void do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) +{ + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + rdmsr(reg, eax, edx); +} + +#endif /* ! CONFIG_SMP */ + + +/* + * FIXME - This is a large chunk of memory to suck up just to decode the + * syndrome. It would be nice to discover a patter in the syndromes that + * could be used to quickly identify the channel. The big problems with + * this table is memory usage, lookup speed (could sort and binary search), + * correctness (there could be a transcription error). A zero in any nibble + * for a syndrom is always channel 0, but that only decodes some of the + * syndromes. Can anyone find any other patterns? + */ +/* + * The comment in the left column is the nibble that is in error. The least + * significant nibble of the syndrome is the mask for the bits that are + * in error (need to be toggled) for the particular nibble. + */ +#define SYNDROME_TABLE_SIZE 270 +static const unsigned long syndromes_chan0[SYNDROME_TABLE_SIZE] = { + /*0*/ 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57, 0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df, + /*1*/ 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7, 0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f, + /*2*/ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + /*3*/ 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057, 0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df, + /*4*/ 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097, 0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f, + /*5*/ 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857, 0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf, + /*6*/ 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467, 0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f, + /*7*/ 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27, 0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff, + /*8*/ 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177, 0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f, + /*9*/ 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07, 0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f, + /*a*/ 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07, 0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f, + /*b*/ 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7, 0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f, + /*c*/ 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87, 0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f, + /*d*/ 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067, 0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f, + /*e*/ 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77, 0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f, + /*f*/ 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77, 0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x9562, 0xa6ee, 0xb72f, + + /*20*/ 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807, 0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f, + /*21*/ 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07, 0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f +}; + +static const unsigned long syndromes_chan1[SYNDROME_TABLE_SIZE] = { + /*10*/ 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187, 0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f, + /*11*/ 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627, 0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff, + /*12*/ 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97, 0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f, + /*13*/ 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97, 0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f, + /*14*/ 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987, 0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f, + /*15*/ 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677, 0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f, + /*16*/ 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387, 0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f, + /*17*/ 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17, 0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf, + /*18*/ 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7, 0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f, + /*19*/ 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397, 0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f, + /*1a*/ 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617, 0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf, + /*1b*/ 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527, 0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff, + /*1c*/ 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7, 0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef, + /*1d*/ 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7, 0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def, + /*1e*/ 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67, 0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f, + /*1f*/ 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377, 0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f, + + /*22*/ 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97, 0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f, + /*23*/ 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757, 0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df +}; + + +/* + * FIXME - either the above table is borken or something is incorrect with + * the way the syndrome is read out of the NB. + */ +static int chan_from_syndrome( unsigned long syndrome ) +{ + int i; + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + for ( i = 0; i < SYNDROME_TABLE_SIZE; i++ ) { + if ( syndromes_chan0[i] == syndrome ) return 0; + if ( syndromes_chan1[i] == syndrome ) return 1; + } + + debugf0( "MC: " __FILE__ ": %s(): syndrome(%lx) not found\n", + __func__, syndrome ); + return -1; +} + + +static const char *tt_msgs[] = { /* transaction type */ + "inst", + "data", + "generic", + "reserved" +}; + + +static const char *ll_msgs[] = { /* cache level */ + "0", + "1", + "2", + "generic" +}; + + +static const char *memtt_msgs[] = { + "generic", + "generic read", + "generic write", + "data read", + "data write", + "inst fetch", + "prefetch", + "evict", + "snoop", + "unknown error 9", + "unknown error 10", + "unknown error 11", + "unknown error 12", + "unknown error 13", + "unknown error 14", + "unknown error 15" +}; + + +static const char *pp_msgs[] = { /* participating processor */ + "local node origin", + "local node response", + "local node observed", + "generic" +}; + + +static const char *to_msgs[] = { + "no timeout", + "timed out" +}; + + +static const char *ii_msgs[] = { /* memory or i/o */ + "mem access", + "reserved", + "i/o access", + "generic" +}; + + +static const char *ext_msgs[] = { /* extended error */ + "ECC error", + "CRC error", + "sync error", + "mst abort", + "tgt abort", + "GART error", + "RMW error", + "watchdog error", + "ECC chipkill x4 error", + "unknown error 9", + "unknown error 10", + "unknown error 11", + "unknown error 12", + "unknown error 13", + "unknown error 14", + "unknown error 15" +}; + + +static const char *htlink_msgs[] = { + "none", + "1", + "2", + "1 2", + "3", + "1 3", + "2 3", + "1 2 3" +}; + + +static inline void decode_gart_tlb_error( struct mem_ctl_info *mci, + u32 nbeah, u32 nbeal, + u32 nbsh, u32 nbsl, + u32 nbcfg ) +{ + u32 err_code; + u32 ec_tt; /* error code transaction type (2b) */ + u32 ec_ll; /* error code cache level (2b) */ + + debugf0( "MC%d: " __FILE__ ": %s(): FIXME\n", mci->mc_idx, __func__ ); + + err_code = nbsl & 0xffffUL; + ec_tt = ( err_code >> 2 ) & 0x03UL; + ec_ll = ( err_code >> 0 ) & 0x03UL; + + printk( "BS%d: GART TLB errorr:" + " transaction type(%s)," + " cache level(%s)\n", + mci->mc_idx, + tt_msgs[ec_tt], + ll_msgs[ec_ll] ); +} + + +static inline void decode_cache_error( struct mem_ctl_info *mci, + u32 nbeah, u32 nbeal, + u32 nbsh, u32 nbsl, + u32 nbcfg ) +{ + u32 err_code; + u32 ec_rrrr; /* error code memory transaction (4b) */ + u32 ec_tt; /* error code transaction type (2b) */ + u32 ec_ll; /* error code cache level (2b) */ + + debugf0( "MC%d: " __FILE__ ": %s(): FIXME\n", mci->mc_idx, __func__ ); + + err_code = nbsl & 0xffffUL; + ec_rrrr = ( err_code >> 4 ) & 0x0fUL; + ec_tt = ( err_code >> 2 ) & 0x03UL; + ec_ll = ( err_code >> 0 ) & 0x03UL; + + printk( "BS%d: cache heirarchy error:" + " memory transaction type(%s)," + " transaction type(%s)," + " cache level(%s)\n", + mci->mc_idx, + memtt_msgs[ ec_rrrr ], + tt_msgs[ ec_tt ], + ll_msgs[ ec_ll ] ); +} + + +static inline void decode_bus_error( struct mem_ctl_info *mci, + u32 nbeah, u32 nbeal, + u32 nbsh, u32 nbsl, + u32 nbcfg ) +{ + u32 page, offset; + u32 err_code, ext_ec; + int row = 0; + u32 ec_pp; /* error code participating processor (2p) */ + u32 ec_to; /* error code timed out (1b) */ + u32 ec_rrrr; /* error code memory transaction (4b) */ + u32 ec_ii; /* error code memory or I/O (2b) */ + u32 ec_ll; /* error code cache level (2b) */ + char msg[1024] = ""; + u32 msg_idx = 0; + + debugf0( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); + + msg_idx = snprintf( msg, 1024, "%s", BS_MOD_STR ); + + err_code = nbsl & 0xffffUL; + ec_pp = ( err_code >> 9 ) & 0x03UL; + ec_to = ( err_code >> 8 ) & 0x01UL; + ec_rrrr = ( err_code >> 4 ) & 0x0fUL; + ec_ii = ( err_code >> 2 ) & 0x03UL; + ec_ll = ( err_code >> 0 ) & 0x03UL; + + ext_ec = ( nbsl >> 16 ) & 0xfUL; + + /* FIXME - these should report through bluesmoke channels */ + + printk( "BS%d: general bus error:" + " participating processor(%s)," + " time-out(%s)," + " memory transaction type(%s)," + " mem or i/o(%s)," + " cache level(%s)\n", + mci->mc_idx, + pp_msgs[ ec_pp ], + to_msgs[ ec_to ], + memtt_msgs[ ec_rrrr ], + ii_msgs[ ec_ii ], + ll_msgs[ ec_ll ] ); + + /* FIXME - other errors should have other error handling mechanisms. */ + if ( ( 0 != ext_ec ) && ( 0x8 != ext_ec ) ) { + printk( "BS%d: no special error handling for this error\n", + mci->mc_idx ); + return; + } + + if ( ec_pp & 0x02 ) { + /* We aren't the node involved */ + return; + } + + offset = nbeal & ~PAGE_MASK & ~0x7UL; + page = ( ( nbeah & 0xff ) << ( 40 - PAGE_SHIFT ) ) + | ( ( nbeal & PAGE_MASK ) >> PAGE_SHIFT ); + + /* process any errors */ + if ( nbsh & BIT(14) ) { /* CE */ + unsigned long syndrome; + int chan = 0; + + syndrome = ( nbsh >> 15 ) & 0x00ffUL; /* bits 7:0 */ + if ( nbcfg & BIT(23) ) { + syndrome |= ( nbsl >> 16 ) & 0xff00UL; /* bits 15:8 */ + chan = chan_from_syndrome( syndrome ); + } + + if ( 0 > chan ) { + /* + * If the syndrome couldn't be found then + * the race condition for error reporting + * registers likely occurred. There's alot + * more in doubt than just the channel. + * Might as well just log the error without + * any info. + */ + msg_idx += snprintf( &msg[ msg_idx ], 1024 - msg_idx, + " unknown syndrome 0x%lx - " + " possible error reporting race", + syndrome ); + bluesmoke_mc_handle_ce_no_info( mci, msg ); + } else if ( nbsh & BIT(26) ) { /* valid address? */ + row = bluesmoke_mc_find_csrow_by_page( mci, page ); + if ( -1 == row ) { + bluesmoke_mc_handle_ce_no_info( mci, msg ); + } else { + bluesmoke_mc_handle_ce( mci, page, offset, + syndrome, row, chan, + msg ); + } + } else { + bluesmoke_mc_handle_ce_no_info( mci, msg ); + } + } else if ( nbsh & BIT(13) ) { /* UE */ + if ( nbsh & BIT(26) ) { /* valid address? */ + row = bluesmoke_mc_find_csrow_by_page( mci, page ); + if ( -1 == row ) { + bluesmoke_mc_handle_ue_no_info( mci, msg ); + } else { + bluesmoke_mc_handle_ue( mci, page, offset, + row, msg ); + } + } else { + bluesmoke_mc_handle_ue_no_info( mci, msg ); + } + } + + if ( nbsh & BIT(30) ) { + /* + * If main error is CE then overflow must be CE. + * If main error is UE then overflow is unknown. + * We'll call the overflow a CE - if panic_on_ue + * is set then we're already panic'ed and won't + * arrive here. If panic_on_ue is not set then + * apparently someone doesn't think that + * UE's are catastrophic. + */ + bluesmoke_mc_handle_ce_no_info( mci, BS_MOD_STR ); + } +} + + +static void k8_check(struct mem_ctl_info *mci) +{ + struct k8_pvt *pvt = (struct k8_pvt *)mci->pvt_info; + u32 nbsl1, nbsh1, nbeal1, nbeah1, nbcfg1; + u32 nbsl2, nbsh2, nbeal2, nbeah2, nbcfg2; + u32 err_code; + u32 ext_ec; + + debugf1( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); + + /* check for an error */ + pci_read_config_dword(pvt->misc_ctl, K8_NBSH, &nbsh1); + if ( ! (nbsh1 & BIT(31) ) ) { /* err valid? */ + return; + } + + /* might as well slurp in everything at once */ + pci_read_config_dword(pvt->misc_ctl, K8_NBSL, &nbsl1); + pci_read_config_dword(pvt->misc_ctl, K8_NBEAL, &nbeal1); + pci_read_config_dword(pvt->misc_ctl, K8_NBEAH, &nbeah1); + pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &nbcfg1); + debugf1( KERN_WARNING + "NorthBridge ERROR: mci(0x%p) node(%d) nbeah(0x%.8x)" + " nbeal(0x%.8x) nbsh(0x%.8x) nbsl(0x%.8x): ", + mci, MCI2NID(mci), nbeah1, nbeal1, nbsh1, nbsl1 ); + + /* + * Here's the problem with the K8's EDAC reporting: + * There are four registers which report pieces of error + * information. These four registers are shared between + * CEs and UEs. Furthermore, contrary to what is stated in + * the OBKG, the overflow bit is never used! Every error + * always updates the reporting registers. + * + * Can you see the race condition? All four error reporting + * registers must be read before a new error updates them! + * There is no way to read all four registers atomically. The + * best than can be done is to detect that a race has occured + * and then report the error without any kind of precision. + * + * What is still positive is that errors are + * still reported and thus problems can still be detected - + * just not localized because the syndrome and address are + * spread out across registers. + * + * Grrrrr!!!!! Here's hoping that AMD fixes this in some + * future K8 rev. UEs and CEs should have separate + * register sets with proper overflow bits that are used! + * At very least the problem can be fixed by honoring the + * ErrValid bit in nbsh and not updating registers - just + * set the overflow bit - unless the current error is CE + * and the new error is UE which would be the only situation + * for overwriting the current values. + */ + pci_read_config_dword(pvt->misc_ctl, K8_NBSH, &nbsh2); + pci_read_config_dword(pvt->misc_ctl, K8_NBSL, &nbsl2); + pci_read_config_dword(pvt->misc_ctl, K8_NBEAL, &nbeal2); + pci_read_config_dword(pvt->misc_ctl, K8_NBEAH, &nbeah2); + pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &nbcfg2); + debugf1( KERN_WARNING + "NorthBridge ERROR2: mci(0x%p) node(%d) nbeah2(0x%.8x)" + " nbeal2(0x%.8x) nbsh2(0x%.8x) nbsl2(0x%.8x): ", + mci, MCI2NID(mci), nbeah2, nbeal2, nbsh2, nbsl2 ); + + /* clear the error */ + pci_write_bits32( pvt->misc_ctl, K8_NBSH, 0, BIT(31) ); + + if ( ( nbsh1 != nbsh2 ) + || ( nbsl1 != nbsl2 ) + || ( nbeah1 != nbeah2 ) + || ( nbeal1 != nbeal2 ) ) { + printk( KERN_WARNING "MC%d: race condition detected!\n", + mci->mc_idx ); + } + + err_code = nbsl2 & 0xffffUL; + ext_ec = (nbsl2 >> 16) & 0x0fUL; + + /* Use info from the second read - most current */ + if ( 0x0010UL == ( err_code & 0xfff0UL ) ) { + debugf1( "GART TLB error\n" ); + decode_gart_tlb_error( mci, nbeah2, nbeal2, nbsh2, nbsl2, nbcfg2 ); + } else if ( 0x0100UL == ( err_code & 0xff00UL ) ) { + debugf1( "Cache error\n" ); + decode_cache_error( mci, nbeah2, nbeal2, nbsh2, nbsl2, nbcfg2 ); + } else if ( 0x0800UL == ( err_code & 0xf800UL ) ) { + debugf1( "Bus error\n" ); + decode_bus_error( mci, nbeah2, nbeal2, nbsh2, nbsl2, nbcfg2 ); + } else { + /* shouldn't reach here! */ + printk( KERN_WARNING "MC%d: " __FILE__ + ": %s(): unknown MCE error 0x%x\n", + mci->mc_idx, __func__, err_code ); + } + + printk( "BS%d: extended error code: %s\n", + mci->mc_idx, + ext_msgs[ ext_ec ] ); + + if ( ((ext_ec >=1 && ext_ec <= 4) || (ext_ec == 6)) + && ((nbsh2 >> 4) & 0x03UL) ) { + /* need to decode which hypertransport link had the error */ + u32 htln = (nbsh2 >> 4) & 0x03UL; + printk( "BS%d: Error on hypertransport link: %s\n", + mci->mc_idx, htlink_msgs[ htln ] ); + } + + /* + * If the processor context is corrupt or the error is + * uncorrectable then panic - why would you want to continue + * with something seriosly broken? + */ + if ( nbsh2 & ( BIT(29) | BIT(25) ) ) { + if ( nbsh2 & BIT(29) ) + printk( "BS%d: uncorrected error\n", mci->mc_idx ); + + if ( nbsh2 & BIT(25) ) + printk( "BS%d: processor context corrupt\n", + mci->mc_idx ); + + panic( "BS%d: cannot recover\n", mci->mc_idx ); + }; +} + + +static int k8_probe1( struct pci_dev *pdev, int dev_idx ) +{ + int rc = -ENODEV; + int index; + struct mem_ctl_info *mci = NULL; + struct k8_pvt *pvt = NULL; + int nid; + u32 dram_pg_base = 0; + u32 dram_pg_limit = 0; + u32 dcl; + u32 dcl_chans; + u32 dcl_unbuf; + u32 dcl_x4; + u32 dcl_eccen; + u32 dbam; + u32 nbcfg; + u32 nbcfg_ckx4en; + u32 nbcfg_eccen; + u32 nbcap; + u32 nbcap_ckx4; + u32 nbcap_ecc; + u32 csrows_loaded = 0; + u32 mcgctl_l, mcgctl_h; + u32 mc4ctl_l, mc4ctl_h; + const struct k8_dev_info *k8_dev = &k8_devs[dev_idx]; + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + pci_read_config_dword(pdev, K8_DCL, &dcl); + dcl_chans = ( dcl >> 16 ) & 0x1; + dcl_unbuf = ( dcl >> 18 ) & 0x1; + dcl_x4 = ( dcl >> 20 ) & 0xf; + dcl_eccen = ( dcl >> 17 ) & 0x1; + pci_read_config_dword(pdev, K8_DBAM, &dbam); + + mci = bluesmoke_mc_init_structs(sizeof(*pvt), + K8_NR_CSROWS, + dcl_chans + 1); + + if ( ! mci ) { + rc = -ENOMEM; + goto FAIL_FINISHED; + } + + debugf0( "MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci ); + + pvt = (struct k8_pvt *)mci->pvt_info; + + mci->pdev = pdev; + nid = MCI2NID(mci); + + /* setup private structure */ + /* + * The address mapping device provides a table that indicates + * which physical address ranges are owned by which node. + * Each node's memory controller has memory controller addresses + * that begin at 0x0. Locally, the memory controller address + * must be added to the mapping device address to convert to + * physical address. + */ + pci_find_related_function( PCI_VENDOR_ID_AMD, + k8_dev->addr_map, + &pvt->addr_map, + mci->pdev ); + + if ( ! pvt->addr_map ) { + printk( KERN_ERR + "MC: error address map device not found:" + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_AMD, + k8_dev->addr_map ); + goto FAIL_FINISHED; + } + + debugf1( "Addr Map device PCI Bus ID:\t%s\n", pvt->addr_map->name ); + + /* + * Sift through address mapper DRAM table - the documentation isn't + * explicit, but it is believed to be an error if there are multiple + * entries for the same node. + */ + for ( index = 0; index < 8; index++ ) { + u32 dbr; + u32 dbr_base = 0; + u32 dbr_inten; + u32 dbr_wen; + u32 dbr_ren; + u32 dlr; + u32 dlr_limit = 0; + u32 dlr_intsel; + u32 dlr_nid; + + pci_read_config_dword( pvt->addr_map, + K8_DLR + (8 * index), + &dlr ); + + dlr_nid = dlr & 0x7; + + if ( dlr_nid != nid ) continue; + + /* + * dlr_limit has all the low-order bits 1 while dbr_base + * has all the low-order bits 0. Here we do some bit + * jockeying to set all the low-order bits of dlr_limit. + */ + dlr_limit = ((((dlr >> 16) & 0xffff) + 1) + << (24 - PAGE_SHIFT)) - 1; + dlr_intsel = (dlr >> 8) & 0x1f; + + pci_read_config_dword( pvt->addr_map, + K8_DBR + (8 * index), + &dbr ); + + dbr_base = ((dbr >> 16) & 0xffff) << (24 - PAGE_SHIFT); + dbr_inten = (dbr >> 8) & 0x7; + dbr_wen = (dbr >> 1) & 0x1; + dbr_ren = dbr & 0x1; + + debugf1( "\tAddr Map: %d:0x%x - 0x%x\n", + dlr_nid, dbr_base, dlr_limit ); + + if ( dram_pg_limit ) { + printk( KERN_ERR + "MC: multiple entries for node %d found" + " in Address Mapping device %s:" + " PROBE FAILED!\n", + nid, pci_name(pvt->misc_ctl) ); + goto FAIL_FINISHED; + } + + dram_pg_limit = dlr_limit; + dram_pg_base = dbr_base; + } + + if (! dram_pg_limit) { + printk( KERN_ERR + "MC: no DRAM entry found for node %d in Address" + " Mapping device: %s: POBE FAILED!\n", + nid, pci_name(pvt->misc_ctl) ); + goto FAIL_FINISHED; + } + + pci_find_related_function( PCI_VENDOR_ID_AMD, + k8_dev->misc_ctl, + &pvt->misc_ctl, + mci->pdev ); + + if ( ! pvt->misc_ctl ) { + printk( KERN_ERR + "MC: error miscellaneous device not found:" + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_AMD, + k8_dev->misc_ctl ); + goto FAIL_FINISHED; + } + + debugf1( "Misc device PCI Bus ID:\t\t%.2x:%.2x.%.1x\n", + pvt->misc_ctl->name ); + + pci_read_config_dword( pvt->misc_ctl, K8_NBCFG, &nbcfg ); + nbcfg_ckx4en = nbcfg & BIT(23); + nbcfg_eccen = nbcfg & BIT(22); + + mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR; + + pci_read_config_dword( pvt->misc_ctl, K8_NBCAP, &nbcap ); + nbcap_ckx4 = ( nbcap >> 4 ) & 0x1; + nbcap_ecc = ( nbcap >> 3 ) & 0x1; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + if ( nbcap_ecc ) mci->edac_ctl_cap |= EDAC_FLAG_SECDED; + if ( nbcap_ckx4 ) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; + + mci->edac_cap = EDAC_FLAG_NONE; + if ( dcl_eccen ) { + mci->edac_cap |= EDAC_FLAG_SECDED; + if ( dcl_chans ) { + mci->edac_cap |= EDAC_FLAG_S4ECD4ED; + } + } + + mci->mod_name = BS_MOD_STR; + mci->mod_ver = "$Revision: 1.6 $"; + mci->ctl_name = k8_devs[dev_idx].ctl_name; + mci->edac_check = k8_check; + mci->clear_err = NULL; + mci->ctl_page_to_phys = NULL; + + for ( index = 0; index < mci->nr_csrows; index++ ) { + struct csrow_info *csrow = &mci->csrows[ index ]; + u32 dcsb; + u32 dcsb_bah; + u32 dcsb_bal; + u32 dcsm; + u32 dcsm_amh; + u32 dcsm_aml; + u32 aml; + u32 device_shift = 0; + u32 intlv_shift = 0; + int i; + + /* find the DRAM Chip Select Base address for this row */ + pci_read_config_dword(mci->pdev, K8_DCSB + (index*4), &dcsb); + if ( ! (dcsb & 0x1) ) { + continue; /* empty */ + } + csrows_loaded++; + dcsb_bal = ((dcsb >> 9) & 0x7fUL) << (13 - PAGE_SHIFT); + dcsb_bah = ((dcsb >> 21) & 0x7ffUL) << (25 - PAGE_SHIFT); + + pci_read_config_dword(mci->pdev, K8_DCSM + (index*4), &dcsm); + dcsm_aml = ((~dcsm >> 9) & 0x7fUL) << (13 - PAGE_SHIFT); + dcsm_amh = ((dcsm >> 21) & 0x1ffUL) << (25 - PAGE_SHIFT); + + debugf2( "\t%d: dcsb(%x) dcsm(%x)\n", index, dcsb, dcsm ); + + /* 25 is 32MiB minimum DIMM size */ + csrow->first_page = (dcsb_bah | dcsb_bal) + dram_pg_base; + csrow->nr_pages = 1 << ((( dbam >> ((index / 2)*4) ) & 0x7) + + 25 - PAGE_SHIFT + dcl_chans); + if ( dcsm_aml ) { + aml = dcsm_aml; + i = 0; + while ( ! (aml & 0x1UL) ) { + i++; + aml >>= 1; + } + device_shift = i; + + i = 0; + while ( aml & 0x1UL ) { + i++; + aml >>= 1; + } + intlv_shift = i; + + csrow->last_page = csrow->first_page + + ( csrow->nr_pages << intlv_shift ) + - ( (1 << device_shift) | 0x1UL ); + } else { + csrow->last_page = csrow->first_page + + csrow->nr_pages - 1; + } + + csrow->page_mask = dcsm_aml; + csrow->grain = 8; /* 8 bytes of resolution */ + csrow->mtype = dcl_unbuf ? MEM_DDR : MEM_RDDR; + if ( ( dcl_x4 >> (index / 2 ) ) & 0x1 ) { + csrow->dtype = DEV_X4; + } else { + csrow->dtype = DEV_UNKNOWN; + } + + if ( nbcfg_eccen ) { + if ( nbcfg_ckx4en ) { + csrow->edac_mode = EDAC_S4ECD4ED; + } else { + csrow->edac_mode = EDAC_SECDED; + } + } else { + csrow->edac_mode = EDAC_NONE; + } + } + + /* clear any pending errors, or initial state bits */ + /* FIXME - should log what is already there */ + pci_write_bits32( pvt->misc_ctl, K8_NBSH, 0, BIT(31) ); + + if ( ! csrows_loaded ) { + mci->edac_cap = EDAC_FLAG_NONE; + } else { + /* turn on error reporting */ + pci_write_bits32( pvt->misc_ctl, K8_NBCTL, 0x3UL, 0x3UL ); + + pci_write_bits32( pvt->misc_ctl, K8_NBCTL, 0x3UL, 0x3UL ); + + do_rdmsr( nid, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h ); + mc4ctl_l |= BIT(0) | BIT(1); + do_wrmsr( nid, K8_MSR_MC4CTL, mc4ctl_l, mc4ctl_h ); + do_rdmsr( nid, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h ); + + do_rdmsr( nid, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h ); + mcgctl_l |= BIT(4); + do_wrmsr( nid, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h ); + do_rdmsr( nid, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h ); + } + + if ( 0 != bluesmoke_mc_add_mc( mci ) ) { + debugf3( "MC: " __FILE__ + ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); + goto FAIL_FINISHED; + } + + /* get this far and it's successful */ + debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); + rc = 0; + goto FINISHED; + + FAIL_FINISHED: + if ( mci ) { + kfree( mci ); + } + + FINISHED: + return( rc ); +} + + +#ifdef CONFIG_PM + +static int k8_suspend (struct pci_dev *pdev, u32 state) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + + +static int k8_resume (struct pci_dev *pdev) +{ + debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); + + return -ENOSYS; +} + +#endif /* CONFIG_PM */ + + +/* returns count (>= 0), or negative on error */ +static int __devinit k8_init_one( struct pci_dev *pdev, + const struct pci_device_id *ent ) +{ + int rc; + + debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); + + /* wake up and enable device */ + if (pci_enable_device (pdev)) { + rc = -EIO; + } else { + rc = k8_probe1( pdev, ent->driver_data ); + } + return rc; +} + + +static void __devexit k8_remove_one( struct pci_dev *pdev ) +{ + struct mem_ctl_info *mci; + + debugf0( __FILE__ ": %s()\n", __func__); + + if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { + goto FINISHED; + } + + if ( 0 != bluesmoke_mc_del_mc( mci ) ) { + goto FINISHED; + } + + kfree( mci ); + + FINISHED: + return; +} + + +static const struct pci_device_id k8_pci_tbl[] __devinitdata = { + { PCI_VEND_DEV( AMD, OPT_2_MEMCTL ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, OPTERON }, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, k8_pci_tbl); + + +static struct pci_driver k8_driver = { + .name = BS_MOD_STR, + .probe = k8_init_one, + .remove = __devexit_p(k8_remove_one), + .id_table = k8_pci_tbl, +#ifdef CONFIG_PM + .suspend = k8_suspend, + .resume = k8_resume, +#endif /* CONFIG_PM */ +}; + + +int __init k8_init(void) +{ + int pci_rc; + + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_rc = pci_module_init( &k8_driver ); + if ( pci_rc < 0 ) return pci_rc; + + return 0; +} + + +static void __exit k8_exit(void) +{ + debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); + pci_unregister_driver( &k8_driver ); +} + + +module_init(k8_init); +module_exit(k8_exit); + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); +MODULE_DESCRIPTION("MC support for AMD K8 memory controllers"); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_mc.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_mc.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_mc.c 2004-12-17 12:46:23.000000000 -0500 @@ -0,0 +1,1112 @@ +/* + * bluesmoke_mc kernel module + * (C) 2003 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Thayne Harbaugh + * Based on work by Dan Hollis and others. + * http://www.anime.net/~goemon/linux-ecc/ + * + * $Id: bluesmoke_mc.c,v 1.9 2004/12/13 22:19:40 thayne Exp $ + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bluesmoke_mc.h" + + +#ifndef pfn_to_page +#define pfn_to_page(pfn) (mem_map + (pfn)) +#endif /* pfn_to_page */ + +#define MC_PROC_DIR "mc" + +/* /proc/mc dir */ +static struct proc_dir_entry *proc_mc; + +/* Setable by module parameter and sysctl */ +#if SCRUB +/* FIXME - do something with scrubbing */ +static int mc_scrub = -1; +#endif /* SCRUB */ +static int panic_on_ue = 1; +static int log_ue = 1; +static int log_ce = 1; +static int poll_msec = 1000; +static struct timer_list timer; + +static DECLARE_MUTEX(mem_ctls_mutex); + +/* FIXME - use list.h */ +/* FIXME - should be dynamic */ +static struct mem_ctl_info *mcis[MAX_MC_DEVICES]; + + +#ifdef CONFIG_SYSCTL +static void dimm_labels( char *buf, void *data ) +{ + int mcidx, ridx, chidx; + char *mcstr, *rstr, *chstr, *lstr, *p; + + lstr = buf; + + mcstr = strsep( &lstr, "." ); + if (! lstr) + return; + mcidx = simple_strtol( mcstr, &p, 0 ); + if ( *p ) + return; + if ( mcidx >= MAX_MC_DEVICES || ! mcis[mcidx] ) + return; + + rstr = strsep( &lstr, "." ); + if (! lstr) + return; + ridx = simple_strtol( rstr, &p, 0 ); + if ( *p ) + return; + if ( ridx >= mcis[mcidx]->nr_csrows + || ! mcis[mcidx]->csrows ) + return; + + chstr = strsep( &lstr, ":" ); + if (! lstr) + return; + chidx = simple_strtol( chstr, &p, 0 ); + if ( *p ) + return; + if ( chidx >= mcis[mcidx]->csrows[ridx].nr_channels + || ! mcis[mcidx]->csrows[ridx].channels ) + return; + + debugf1( "%d:%d.%d:%s\n", + mcidx, ridx, chidx, lstr ); + + strncpy(mcis[mcidx]->csrows[ridx].channels[chidx].label, + lstr, BLUESMOKE_MC_LABEL_LEN + 1); + /* + * no need to NUL terminate label since + * get_user_tok() NUL terminates. + */ +} + + +static void counter_reset( char *buf, void *data ) +{ + char *p = buf; + int mcidx, row, chan; + struct mem_ctl_info *mci; + + mcidx = simple_strtol( buf, &p, 0 ); + if ( *p ) + return; + if ( mcidx >= MAX_MC_DEVICES || ! mcis[mcidx] ) + return; + + mci = mcis[mcidx]; + mci->ue_noinfo_count = 0; + mci->ce_noinfo_count = 0; + mci->ue_count = 0; + mci->ce_count = 0; + for ( row = 0; row < mci->nr_csrows; row++ ) { + struct csrow_info *ri = &mci->csrows[row]; + + ri->ue_count = 0; + ri->ce_count = 0; + for ( chan = 0; chan < ri->nr_channels; chan++ ) { + ri->channels[chan].ce_count = 0; + } + } + do_gettimeofday( &mci->tv ); +} + + +struct actionvec_info { + void (*action)(char *str, void *data); + char separator; + char *usage; + void *data; +}; + + +static struct actionvec_info dimm_labels_avi = { + .action = dimm_labels, + .separator = ',', + .usage = "..: