Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
   1/*
   2 * acenic.c: Linux driver for the Alteon AceNIC Gigabit Ethernet card
   3 *           and other Tigon based cards.
   4 *
   5 * Copyright 1998-2002 by Jes Sorensen, <jes@trained-monkey.org>.
   6 *
   7 * Thanks to Alteon and 3Com for providing hardware and documentation
   8 * enabling me to write this driver.
   9 *
  10 * A mailing list for discussing the use of this driver has been
  11 * setup, please subscribe to the lists if you have any questions
  12 * about the driver. Send mail to linux-acenic-help@sunsite.auc.dk to
  13 * see how to subscribe.
  14 *
  15 * This program is free software; you can redistribute it and/or modify
  16 * it under the terms of the GNU General Public License as published by
  17 * the Free Software Foundation; either version 2 of the License, or
  18 * (at your option) any later version.
  19 *
  20 * Additional credits:
  21 *   Pete Wyckoff <wyckoff@ca.sandia.gov>: Initial Linux/Alpha and trace
  22 *       dump support. The trace dump support has not been
  23 *       integrated yet however.
  24 *   Troy Benjegerdes: Big Endian (PPC) patches.
  25 *   Nate Stahl: Better out of memory handling and stats support.
  26 *   Aman Singla: Nasty race between interrupt handler and tx code dealing
  27 *                with 'testing the tx_ret_csm and setting tx_full'
  28 *   David S. Miller <davem@redhat.com>: conversion to new PCI dma mapping
  29 *                                       infrastructure and Sparc support
  30 *   Pierrick Pinasseau (CERN): For lending me an Ultra 5 to test the
  31 *                              driver under Linux/Sparc64
  32 *   Matt Domsch <Matt_Domsch@dell.com>: Detect Alteon 1000baseT cards
  33 *                                       ETHTOOL_GDRVINFO support
  34 *   Chip Salzenberg <chip@valinux.com>: Fix race condition between tx
  35 *                                       handler and close() cleanup.
  36 *   Ken Aaker <kdaaker@rchland.vnet.ibm.com>: Correct check for whether
  37 *                                       memory mapped IO is enabled to
  38 *                                       make the driver work on RS/6000.
  39 *   Takayoshi Kouchi <kouchi@hpc.bs1.fc.nec.co.jp>: Identifying problem
  40 *                                       where the driver would disable
  41 *                                       bus master mode if it had to disable
  42 *                                       write and invalidate.
  43 *   Stephen Hack <stephen_hack@hp.com>: Fixed ace_set_mac_addr for little
  44 *                                       endian systems.
  45 *   Val Henson <vhenson@esscom.com>:    Reset Jumbo skb producer and
  46 *                                       rx producer index when
  47 *                                       flushing the Jumbo ring.
  48 *   Hans Grobler <grobh@sun.ac.za>:     Memory leak fixes in the
  49 *                                       driver init path.
  50 *   Grant Grundler <grundler@cup.hp.com>: PCI write posting fixes.
  51 */
  52
  53#include <linux/module.h>
  54#include <linux/moduleparam.h>
  55#include <linux/types.h>
  56#include <linux/errno.h>
  57#include <linux/ioport.h>
  58#include <linux/pci.h>
  59#include <linux/dma-mapping.h>
  60#include <linux/kernel.h>
  61#include <linux/netdevice.h>
  62#include <linux/etherdevice.h>
  63#include <linux/skbuff.h>
  64#include <linux/init.h>
  65#include <linux/delay.h>
  66#include <linux/mm.h>
  67#include <linux/highmem.h>
  68#include <linux/sockios.h>
  69#include <linux/firmware.h>
  70#include <linux/slab.h>
  71#include <linux/prefetch.h>
  72#include <linux/if_vlan.h>
  73
  74#ifdef SIOCETHTOOL
  75#include <linux/ethtool.h>
  76#endif
  77
  78#include <net/sock.h>
  79#include <net/ip.h>
  80
  81#include <asm/system.h>
  82#include <asm/io.h>
  83#include <asm/irq.h>
  84#include <asm/byteorder.h>
  85#include <asm/uaccess.h>
  86
  87
  88#define DRV_NAME "acenic"
  89
  90#undef INDEX_DEBUG
  91
  92#ifdef CONFIG_ACENIC_OMIT_TIGON_I
  93#define ACE_IS_TIGON_I(ap)	0
  94#define ACE_TX_RING_ENTRIES(ap)	MAX_TX_RING_ENTRIES
  95#else
  96#define ACE_IS_TIGON_I(ap)	(ap->version == 1)
  97#define ACE_TX_RING_ENTRIES(ap)	ap->tx_ring_entries
  98#endif
  99
 100#ifndef PCI_VENDOR_ID_ALTEON
 101#define PCI_VENDOR_ID_ALTEON		0x12ae
 102#endif
 103#ifndef PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE
 104#define PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE  0x0001
 105#define PCI_DEVICE_ID_ALTEON_ACENIC_COPPER 0x0002
 106#endif
 107#ifndef PCI_DEVICE_ID_3COM_3C985
 108#define PCI_DEVICE_ID_3COM_3C985	0x0001
 109#endif
 110#ifndef PCI_VENDOR_ID_NETGEAR
 111#define PCI_VENDOR_ID_NETGEAR		0x1385
 112#define PCI_DEVICE_ID_NETGEAR_GA620	0x620a
 113#endif
 114#ifndef PCI_DEVICE_ID_NETGEAR_GA620T
 115#define PCI_DEVICE_ID_NETGEAR_GA620T	0x630a
 116#endif
 117
 118
 119/*
 120 * Farallon used the DEC vendor ID by mistake and they seem not
 121 * to care - stinky!
 122 */
 123#ifndef PCI_DEVICE_ID_FARALLON_PN9000SX
 124#define PCI_DEVICE_ID_FARALLON_PN9000SX	0x1a
 125#endif
 126#ifndef PCI_DEVICE_ID_FARALLON_PN9100T
 127#define PCI_DEVICE_ID_FARALLON_PN9100T  0xfa
 128#endif
 129#ifndef PCI_VENDOR_ID_SGI
 130#define PCI_VENDOR_ID_SGI		0x10a9
 131#endif
 132#ifndef PCI_DEVICE_ID_SGI_ACENIC
 133#define PCI_DEVICE_ID_SGI_ACENIC	0x0009
 134#endif
 135
 136static DEFINE_PCI_DEVICE_TABLE(acenic_pci_tbl) = {
 137	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE,
 138	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 139	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_COPPER,
 140	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 141	{ PCI_VENDOR_ID_3COM, PCI_DEVICE_ID_3COM_3C985,
 142	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 143	{ PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620,
 144	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 145	{ PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620T,
 146	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 147	/*
 148	 * Farallon used the DEC vendor ID on their cards incorrectly,
 149	 * then later Alteon's ID.
 150	 */
 151	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_FARALLON_PN9000SX,
 152	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 153	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_FARALLON_PN9100T,
 154	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 155	{ PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_ACENIC,
 156	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 157	{ }
 158};
 159MODULE_DEVICE_TABLE(pci, acenic_pci_tbl);
 160
 161#define ace_sync_irq(irq)	synchronize_irq(irq)
 162
 163#ifndef offset_in_page
 164#define offset_in_page(ptr)	((unsigned long)(ptr) & ~PAGE_MASK)
 165#endif
 166
 167#define ACE_MAX_MOD_PARMS	8
 168#define BOARD_IDX_STATIC	0
 169#define BOARD_IDX_OVERFLOW	-1
 170
 171#include "acenic.h"
 172
 173/*
 174 * These must be defined before the firmware is included.
 175 */
 176#define MAX_TEXT_LEN	96*1024
 177#define MAX_RODATA_LEN	8*1024
 178#define MAX_DATA_LEN	2*1024
 179
 180#ifndef tigon2FwReleaseLocal
 181#define tigon2FwReleaseLocal 0
 182#endif
 183
 184/*
 185 * This driver currently supports Tigon I and Tigon II based cards
 186 * including the Alteon AceNIC, the 3Com 3C985[B] and NetGear
 187 * GA620. The driver should also work on the SGI, DEC and Farallon
 188 * versions of the card, however I have not been able to test that
 189 * myself.
 190 *
 191 * This card is really neat, it supports receive hardware checksumming
 192 * and jumbo frames (up to 9000 bytes) and does a lot of work in the
 193 * firmware. Also the programming interface is quite neat, except for
 194 * the parts dealing with the i2c eeprom on the card ;-)
 195 *
 196 * Using jumbo frames:
 197 *
 198 * To enable jumbo frames, simply specify an mtu between 1500 and 9000
 199 * bytes to ifconfig. Jumbo frames can be enabled or disabled at any time
 200 * by running `ifconfig eth<X> mtu <MTU>' with <X> being the Ethernet
 201 * interface number and <MTU> being the MTU value.
 202 *
 203 * Module parameters:
 204 *
 205 * When compiled as a loadable module, the driver allows for a number
 206 * of module parameters to be specified. The driver supports the
 207 * following module parameters:
 208 *
 209 *  trace=<val> - Firmware trace level. This requires special traced
 210 *                firmware to replace the firmware supplied with
 211 *                the driver - for debugging purposes only.
 212 *
 213 *  link=<val>  - Link state. Normally you want to use the default link
 214 *                parameters set by the driver. This can be used to
 215 *                override these in case your switch doesn't negotiate
 216 *                the link properly. Valid values are:
 217 *         0x0001 - Force half duplex link.
 218 *         0x0002 - Do not negotiate line speed with the other end.
 219 *         0x0010 - 10Mbit/sec link.
 220 *         0x0020 - 100Mbit/sec link.
 221 *         0x0040 - 1000Mbit/sec link.
 222 *         0x0100 - Do not negotiate flow control.
 223 *         0x0200 - Enable RX flow control Y
 224 *         0x0400 - Enable TX flow control Y (Tigon II NICs only).
 225 *                Default value is 0x0270, ie. enable link+flow
 226 *                control negotiation. Negotiating the highest
 227 *                possible link speed with RX flow control enabled.
 228 *
 229 *                When disabling link speed negotiation, only one link
 230 *                speed is allowed to be specified!
 231 *
 232 *  tx_coal_tick=<val> - number of coalescing clock ticks (us) allowed
 233 *                to wait for more packets to arive before
 234 *                interrupting the host, from the time the first
 235 *                packet arrives.
 236 *
 237 *  rx_coal_tick=<val> - number of coalescing clock ticks (us) allowed
 238 *                to wait for more packets to arive in the transmit ring,
 239 *                before interrupting the host, after transmitting the
 240 *                first packet in the ring.
 241 *
 242 *  max_tx_desc=<val> - maximum number of transmit descriptors
 243 *                (packets) transmitted before interrupting the host.
 244 *
 245 *  max_rx_desc=<val> - maximum number of receive descriptors
 246 *                (packets) received before interrupting the host.
 247 *
 248 *  tx_ratio=<val> - 7 bit value (0 - 63) specifying the split in 64th
 249 *                increments of the NIC's on board memory to be used for
 250 *                transmit and receive buffers. For the 1MB NIC app. 800KB
 251 *                is available, on the 1/2MB NIC app. 300KB is available.
 252 *                68KB will always be available as a minimum for both
 253 *                directions. The default value is a 50/50 split.
 254 *  dis_pci_mem_inval=<val> - disable PCI memory write and invalidate
 255 *                operations, default (1) is to always disable this as
 256 *                that is what Alteon does on NT. I have not been able
 257 *                to measure any real performance differences with
 258 *                this on my systems. Set <val>=0 if you want to
 259 *                enable these operations.
 260 *
 261 * If you use more than one NIC, specify the parameters for the
 262 * individual NICs with a comma, ie. trace=0,0x00001fff,0 you want to
 263 * run tracing on NIC #2 but not on NIC #1 and #3.
 264 *
 265 * TODO:
 266 *
 267 * - Proper multicast support.
 268 * - NIC dump support.
 269 * - More tuning parameters.
 270 *
 271 * The mini ring is not used under Linux and I am not sure it makes sense
 272 * to actually use it.
 273 *
 274 * New interrupt handler strategy:
 275 *
 276 * The old interrupt handler worked using the traditional method of
 277 * replacing an skbuff with a new one when a packet arrives. However
 278 * the rx rings do not need to contain a static number of buffer
 279 * descriptors, thus it makes sense to move the memory allocation out
 280 * of the main interrupt handler and do it in a bottom half handler
 281 * and only allocate new buffers when the number of buffers in the
 282 * ring is below a certain threshold. In order to avoid starving the
 283 * NIC under heavy load it is however necessary to force allocation
 284 * when hitting a minimum threshold. The strategy for alloction is as
 285 * follows:
 286 *
 287 *     RX_LOW_BUF_THRES    - allocate buffers in the bottom half
 288 *     RX_PANIC_LOW_THRES  - we are very low on buffers, allocate
 289 *                           the buffers in the interrupt handler
 290 *     RX_RING_THRES       - maximum number of buffers in the rx ring
 291 *     RX_MINI_THRES       - maximum number of buffers in the mini ring
 292 *     RX_JUMBO_THRES      - maximum number of buffers in the jumbo ring
 293 *
 294 * One advantagous side effect of this allocation approach is that the
 295 * entire rx processing can be done without holding any spin lock
 296 * since the rx rings and registers are totally independent of the tx
 297 * ring and its registers.  This of course includes the kmalloc's of
 298 * new skb's. Thus start_xmit can run in parallel with rx processing
 299 * and the memory allocation on SMP systems.
 300 *
 301 * Note that running the skb reallocation in a bottom half opens up
 302 * another can of races which needs to be handled properly. In
 303 * particular it can happen that the interrupt handler tries to run
 304 * the reallocation while the bottom half is either running on another
 305 * CPU or was interrupted on the same CPU. To get around this the
 306 * driver uses bitops to prevent the reallocation routines from being
 307 * reentered.
 308 *
 309 * TX handling can also be done without holding any spin lock, wheee
 310 * this is fun! since tx_ret_csm is only written to by the interrupt
 311 * handler. The case to be aware of is when shutting down the device
 312 * and cleaning up where it is necessary to make sure that
 313 * start_xmit() is not running while this is happening. Well DaveM
 314 * informs me that this case is already protected against ... bye bye
 315 * Mr. Spin Lock, it was nice to know you.
 316 *
 317 * TX interrupts are now partly disabled so the NIC will only generate
 318 * TX interrupts for the number of coal ticks, not for the number of
 319 * TX packets in the queue. This should reduce the number of TX only,
 320 * ie. when no RX processing is done, interrupts seen.
 321 */
 322
 323/*
 324 * Threshold values for RX buffer allocation - the low water marks for
 325 * when to start refilling the rings are set to 75% of the ring
 326 * sizes. It seems to make sense to refill the rings entirely from the
 327 * intrrupt handler once it gets below the panic threshold, that way
 328 * we don't risk that the refilling is moved to another CPU when the
 329 * one running the interrupt handler just got the slab code hot in its
 330 * cache.
 331 */
 332#define RX_RING_SIZE		72
 333#define RX_MINI_SIZE		64
 334#define RX_JUMBO_SIZE		48
 335
 336#define RX_PANIC_STD_THRES	16
 337#define RX_PANIC_STD_REFILL	(3*RX_PANIC_STD_THRES)/2
 338#define RX_LOW_STD_THRES	(3*RX_RING_SIZE)/4
 339#define RX_PANIC_MINI_THRES	12
 340#define RX_PANIC_MINI_REFILL	(3*RX_PANIC_MINI_THRES)/2
 341#define RX_LOW_MINI_THRES	(3*RX_MINI_SIZE)/4
 342#define RX_PANIC_JUMBO_THRES	6
 343#define RX_PANIC_JUMBO_REFILL	(3*RX_PANIC_JUMBO_THRES)/2
 344#define RX_LOW_JUMBO_THRES	(3*RX_JUMBO_SIZE)/4
 345
 346
 347/*
 348 * Size of the mini ring entries, basically these just should be big
 349 * enough to take TCP ACKs
 350 */
 351#define ACE_MINI_SIZE		100
 352
 353#define ACE_MINI_BUFSIZE	ACE_MINI_SIZE
 354#define ACE_STD_BUFSIZE		(ACE_STD_MTU + ETH_HLEN + 4)
 355#define ACE_JUMBO_BUFSIZE	(ACE_JUMBO_MTU + ETH_HLEN + 4)
 356
 357/*
 358 * There seems to be a magic difference in the effect between 995 and 996
 359 * but little difference between 900 and 995 ... no idea why.
 360 *
 361 * There is now a default set of tuning parameters which is set, depending
 362 * on whether or not the user enables Jumbo frames. It's assumed that if
 363 * Jumbo frames are enabled, the user wants optimal tuning for that case.
 364 */
 365#define DEF_TX_COAL		400 /* 996 */
 366#define DEF_TX_MAX_DESC		60  /* was 40 */
 367#define DEF_RX_COAL		120 /* 1000 */
 368#define DEF_RX_MAX_DESC		25
 369#define DEF_TX_RATIO		21 /* 24 */
 370
 371#define DEF_JUMBO_TX_COAL	20
 372#define DEF_JUMBO_TX_MAX_DESC	60
 373#define DEF_JUMBO_RX_COAL	30
 374#define DEF_JUMBO_RX_MAX_DESC	6
 375#define DEF_JUMBO_TX_RATIO	21
 376
 377#if tigon2FwReleaseLocal < 20001118
 378/*
 379 * Standard firmware and early modifications duplicate
 380 * IRQ load without this flag (coal timer is never reset).
 381 * Note that with this flag tx_coal should be less than
 382 * time to xmit full tx ring.
 383 * 400usec is not so bad for tx ring size of 128.
 384 */
 385#define TX_COAL_INTS_ONLY	1	/* worth it */
 386#else
 387/*
 388 * With modified firmware, this is not necessary, but still useful.
 389 */
 390#define TX_COAL_INTS_ONLY	1
 391#endif
 392
 393#define DEF_TRACE		0
 394#define DEF_STAT		(2 * TICKS_PER_SEC)
 395
 396
 397static int link_state[ACE_MAX_MOD_PARMS];
 398static int trace[ACE_MAX_MOD_PARMS];
 399static int tx_coal_tick[ACE_MAX_MOD_PARMS];
 400static int rx_coal_tick[ACE_MAX_MOD_PARMS];
 401static int max_tx_desc[ACE_MAX_MOD_PARMS];
 402static int max_rx_desc[ACE_MAX_MOD_PARMS];
 403static int tx_ratio[ACE_MAX_MOD_PARMS];
 404static int dis_pci_mem_inval[ACE_MAX_MOD_PARMS] = {1, 1, 1, 1, 1, 1, 1, 1};
 405
 406MODULE_AUTHOR("Jes Sorensen <jes@trained-monkey.org>");
 407MODULE_LICENSE("GPL");
 408MODULE_DESCRIPTION("AceNIC/3C985/GA620 Gigabit Ethernet driver");
 409#ifndef CONFIG_ACENIC_OMIT_TIGON_I
 410MODULE_FIRMWARE("acenic/tg1.bin");
 411#endif
 412MODULE_FIRMWARE("acenic/tg2.bin");
 413
 414module_param_array_named(link, link_state, int, NULL, 0);
 415module_param_array(trace, int, NULL, 0);
 416module_param_array(tx_coal_tick, int, NULL, 0);
 417module_param_array(max_tx_desc, int, NULL, 0);
 418module_param_array(rx_coal_tick, int, NULL, 0);
 419module_param_array(max_rx_desc, int, NULL, 0);
 420module_param_array(tx_ratio, int, NULL, 0);
 421MODULE_PARM_DESC(link, "AceNIC/3C985/NetGear link state");
 422MODULE_PARM_DESC(trace, "AceNIC/3C985/NetGear firmware trace level");
 423MODULE_PARM_DESC(tx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first tx descriptor arrives");
 424MODULE_PARM_DESC(max_tx_desc, "AceNIC/3C985/GA620 max number of transmit descriptors to wait");
 425MODULE_PARM_DESC(rx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first rx descriptor arrives");
 426MODULE_PARM_DESC(max_rx_desc, "AceNIC/3C985/GA620 max number of receive descriptors to wait");
 427MODULE_PARM_DESC(tx_ratio, "AceNIC/3C985/GA620 ratio of NIC memory used for TX/RX descriptors (range 0-63)");
 428
 429
 430static const char version[] __devinitconst =
 431  "acenic.c: v0.92 08/05/2002  Jes Sorensen, linux-acenic@SunSITE.dk\n"
 432  "                            http://home.cern.ch/~jes/gige/acenic.html\n";
 433
 434static int ace_get_settings(struct net_device *, struct ethtool_cmd *);
 435static int ace_set_settings(struct net_device *, struct ethtool_cmd *);
 436static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 437
 438static const struct ethtool_ops ace_ethtool_ops = {
 439	.get_settings = ace_get_settings,
 440	.set_settings = ace_set_settings,
 441	.get_drvinfo = ace_get_drvinfo,
 442};
 443
 444static void ace_watchdog(struct net_device *dev);
 445
 446static const struct net_device_ops ace_netdev_ops = {
 447	.ndo_open		= ace_open,
 448	.ndo_stop		= ace_close,
 449	.ndo_tx_timeout		= ace_watchdog,
 450	.ndo_get_stats		= ace_get_stats,
 451	.ndo_start_xmit		= ace_start_xmit,
 452	.ndo_set_multicast_list	= ace_set_multicast_list,
 453	.ndo_validate_addr	= eth_validate_addr,
 454	.ndo_set_mac_address	= ace_set_mac_addr,
 455	.ndo_change_mtu		= ace_change_mtu,
 456};
 457
 458static int __devinit acenic_probe_one(struct pci_dev *pdev,
 459		const struct pci_device_id *id)
 460{
 461	struct net_device *dev;
 462	struct ace_private *ap;
 463	static int boards_found;
 464
 465	dev = alloc_etherdev(sizeof(struct ace_private));
 466	if (dev == NULL) {
 467		printk(KERN_ERR "acenic: Unable to allocate "
 468		       "net_device structure!\n");
 469		return -ENOMEM;
 470	}
 471
 472	SET_NETDEV_DEV(dev, &pdev->dev);
 473
 474	ap = netdev_priv(dev);
 475	ap->pdev = pdev;
 476	ap->name = pci_name(pdev);
 477
 478	dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
 479	dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
 480
 481	dev->watchdog_timeo = 5*HZ;
 482
 483	dev->netdev_ops = &ace_netdev_ops;
 484	SET_ETHTOOL_OPS(dev, &ace_ethtool_ops);
 485
 486	/* we only display this string ONCE */
 487	if (!boards_found)
 488		printk(version);
 489
 490	if (pci_enable_device(pdev))
 491		goto fail_free_netdev;
 492
 493	/*
 494	 * Enable master mode before we start playing with the
 495	 * pci_command word since pci_set_master() will modify
 496	 * it.
 497	 */
 498	pci_set_master(pdev);
 499
 500	pci_read_config_word(pdev, PCI_COMMAND, &ap->pci_command);
 501
 502	/* OpenFirmware on Mac's does not set this - DOH.. */
 503	if (!(ap->pci_command & PCI_COMMAND_MEMORY)) {
 504		printk(KERN_INFO "%s: Enabling PCI Memory Mapped "
 505		       "access - was not enabled by BIOS/Firmware\n",
 506		       ap->name);
 507		ap->pci_command = ap->pci_command | PCI_COMMAND_MEMORY;
 508		pci_write_config_word(ap->pdev, PCI_COMMAND,
 509				      ap->pci_command);
 510		wmb();
 511	}
 512
 513	pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &ap->pci_latency);
 514	if (ap->pci_latency <= 0x40) {
 515		ap->pci_latency = 0x40;
 516		pci_write_config_byte(pdev, PCI_LATENCY_TIMER, ap->pci_latency);
 517	}
 518
 519	/*
 520	 * Remap the regs into kernel space - this is abuse of
 521	 * dev->base_addr since it was means for I/O port
 522	 * addresses but who gives a damn.
 523	 */
 524	dev->base_addr = pci_resource_start(pdev, 0);
 525	ap->regs = ioremap(dev->base_addr, 0x4000);
 526	if (!ap->regs) {
 527		printk(KERN_ERR "%s:  Unable to map I/O register, "
 528		       "AceNIC %i will be disabled.\n",
 529		       ap->name, boards_found);
 530		goto fail_free_netdev;
 531	}
 532
 533	switch(pdev->vendor) {
 534	case PCI_VENDOR_ID_ALTEON:
 535		if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9100T) {
 536			printk(KERN_INFO "%s: Farallon PN9100-T ",
 537			       ap->name);
 538		} else {
 539			printk(KERN_INFO "%s: Alteon AceNIC ",
 540			       ap->name);
 541		}
 542		break;
 543	case PCI_VENDOR_ID_3COM:
 544		printk(KERN_INFO "%s: 3Com 3C985 ", ap->name);
 545		break;
 546	case PCI_VENDOR_ID_NETGEAR:
 547		printk(KERN_INFO "%s: NetGear GA620 ", ap->name);
 548		break;
 549	case PCI_VENDOR_ID_DEC:
 550		if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9000SX) {
 551			printk(KERN_INFO "%s: Farallon PN9000-SX ",
 552			       ap->name);
 553			break;
 554		}
 555	case PCI_VENDOR_ID_SGI:
 556		printk(KERN_INFO "%s: SGI AceNIC ", ap->name);
 557		break;
 558	default:
 559		printk(KERN_INFO "%s: Unknown AceNIC ", ap->name);
 560		break;
 561	}
 562
 563	printk("Gigabit Ethernet at 0x%08lx, ", dev->base_addr);
 564	printk("irq %d\n", pdev->irq);
 565
 566#ifdef CONFIG_ACENIC_OMIT_TIGON_I
 567	if ((readl(&ap->regs->HostCtrl) >> 28) == 4) {
 568		printk(KERN_ERR "%s: Driver compiled without Tigon I"
 569		       " support - NIC disabled\n", dev->name);
 570		goto fail_uninit;
 571	}
 572#endif
 573
 574	if (ace_allocate_descriptors(dev))
 575		goto fail_free_netdev;
 576
 577#ifdef MODULE
 578	if (boards_found >= ACE_MAX_MOD_PARMS)
 579		ap->board_idx = BOARD_IDX_OVERFLOW;
 580	else
 581		ap->board_idx = boards_found;
 582#else
 583	ap->board_idx = BOARD_IDX_STATIC;
 584#endif
 585
 586	if (ace_init(dev))
 587		goto fail_free_netdev;
 588
 589	if (register_netdev(dev)) {
 590		printk(KERN_ERR "acenic: device registration failed\n");
 591		goto fail_uninit;
 592	}
 593	ap->name = dev->name;
 594
 595	if (ap->pci_using_dac)
 596		dev->features |= NETIF_F_HIGHDMA;
 597
 598	pci_set_drvdata(pdev, dev);
 599
 600	boards_found++;
 601	return 0;
 602
 603 fail_uninit:
 604	ace_init_cleanup(dev);
 605 fail_free_netdev:
 606	free_netdev(dev);
 607	return -ENODEV;
 608}
 609
 610static void __devexit acenic_remove_one(struct pci_dev *pdev)
 611{
 612	struct net_device *dev = pci_get_drvdata(pdev);
 613	struct ace_private *ap = netdev_priv(dev);
 614	struct ace_regs __iomem *regs = ap->regs;
 615	short i;
 616
 617	unregister_netdev(dev);
 618
 619	writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
 620	if (ap->version >= 2)
 621		writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
 622
 623	/*
 624	 * This clears any pending interrupts
 625	 */
 626	writel(1, &regs->Mb0Lo);
 627	readl(&regs->CpuCtrl);	/* flush */
 628
 629	/*
 630	 * Make sure no other CPUs are processing interrupts
 631	 * on the card before the buffers are being released.
 632	 * Otherwise one might experience some `interesting'
 633	 * effects.
 634	 *
 635	 * Then release the RX buffers - jumbo buffers were
 636	 * already released in ace_close().
 637	 */
 638	ace_sync_irq(dev->irq);
 639
 640	for (i = 0; i < RX_STD_RING_ENTRIES; i++) {
 641		struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb;
 642
 643		if (skb) {
 644			struct ring_info *ringp;
 645			dma_addr_t mapping;
 646
 647			ringp = &ap->skb->rx_std_skbuff[i];
 648			mapping = dma_unmap_addr(ringp, mapping);
 649			pci_unmap_page(ap->pdev, mapping,
 650				       ACE_STD_BUFSIZE,
 651				       PCI_DMA_FROMDEVICE);
 652
 653			ap->rx_std_ring[i].size = 0;
 654			ap->skb->rx_std_skbuff[i].skb = NULL;
 655			dev_kfree_skb(skb);
 656		}
 657	}
 658
 659	if (ap->version >= 2) {
 660		for (i = 0; i < RX_MINI_RING_ENTRIES; i++) {
 661			struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb;
 662
 663			if (skb) {
 664				struct ring_info *ringp;
 665				dma_addr_t mapping;
 666
 667				ringp = &ap->skb->rx_mini_skbuff[i];
 668				mapping = dma_unmap_addr(ringp,mapping);
 669				pci_unmap_page(ap->pdev, mapping,
 670					       ACE_MINI_BUFSIZE,
 671					       PCI_DMA_FROMDEVICE);
 672
 673				ap->rx_mini_ring[i].size = 0;
 674				ap->skb->rx_mini_skbuff[i].skb = NULL;
 675				dev_kfree_skb(skb);
 676			}
 677		}
 678	}
 679
 680	for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) {
 681		struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb;
 682		if (skb) {
 683			struct ring_info *ringp;
 684			dma_addr_t mapping;
 685
 686			ringp = &ap->skb->rx_jumbo_skbuff[i];
 687			mapping = dma_unmap_addr(ringp, mapping);
 688			pci_unmap_page(ap->pdev, mapping,
 689				       ACE_JUMBO_BUFSIZE,
 690				       PCI_DMA_FROMDEVICE);
 691
 692			ap->rx_jumbo_ring[i].size = 0;
 693			ap->skb->rx_jumbo_skbuff[i].skb = NULL;
 694			dev_kfree_skb(skb);
 695		}
 696	}
 697
 698	ace_init_cleanup(dev);
 699	free_netdev(dev);
 700}
 701
 702static struct pci_driver acenic_pci_driver = {
 703	.name		= "acenic",
 704	.id_table	= acenic_pci_tbl,
 705	.probe		= acenic_probe_one,
 706	.remove		= __devexit_p(acenic_remove_one),
 707};
 708
 709static int __init acenic_init(void)
 710{
 711	return pci_register_driver(&acenic_pci_driver);
 712}
 713
 714static void __exit acenic_exit(void)
 715{
 716	pci_unregister_driver(&acenic_pci_driver);
 717}
 718
 719module_init(acenic_init);
 720module_exit(acenic_exit);
 721
 722static void ace_free_descriptors(struct net_device *dev)
 723{
 724	struct ace_private *ap = netdev_priv(dev);
 725	int size;
 726
 727	if (ap->rx_std_ring != NULL) {
 728		size = (sizeof(struct rx_desc) *
 729			(RX_STD_RING_ENTRIES +
 730			 RX_JUMBO_RING_ENTRIES +
 731			 RX_MINI_RING_ENTRIES +
 732			 RX_RETURN_RING_ENTRIES));
 733		pci_free_consistent(ap->pdev, size, ap->rx_std_ring,
 734				    ap->rx_ring_base_dma);
 735		ap->rx_std_ring = NULL;
 736		ap->rx_jumbo_ring = NULL;
 737		ap->rx_mini_ring = NULL;
 738		ap->rx_return_ring = NULL;
 739	}
 740	if (ap->evt_ring != NULL) {
 741		size = (sizeof(struct event) * EVT_RING_ENTRIES);
 742		pci_free_consistent(ap->pdev, size, ap->evt_ring,
 743				    ap->evt_ring_dma);
 744		ap->evt_ring = NULL;
 745	}
 746	if (ap->tx_ring != NULL && !ACE_IS_TIGON_I(ap)) {
 747		size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES);
 748		pci_free_consistent(ap->pdev, size, ap->tx_ring,
 749				    ap->tx_ring_dma);
 750	}
 751	ap->tx_ring = NULL;
 752
 753	if (ap->evt_prd != NULL) {
 754		pci_free_consistent(ap->pdev, sizeof(u32),
 755				    (void *)ap->evt_prd, ap->evt_prd_dma);
 756		ap->evt_prd = NULL;
 757	}
 758	if (ap->rx_ret_prd != NULL) {
 759		pci_free_consistent(ap->pdev, sizeof(u32),
 760				    (void *)ap->rx_ret_prd,
 761				    ap->rx_ret_prd_dma);
 762		ap->rx_ret_prd = NULL;
 763	}
 764	if (ap->tx_csm != NULL) {
 765		pci_free_consistent(ap->pdev, sizeof(u32),
 766				    (void *)ap->tx_csm, ap->tx_csm_dma);
 767		ap->tx_csm = NULL;
 768	}
 769}
 770
 771
 772static int ace_allocate_descriptors(struct net_device *dev)
 773{
 774	struct ace_private *ap = netdev_priv(dev);
 775	int size;
 776
 777	size = (sizeof(struct rx_desc) *
 778		(RX_STD_RING_ENTRIES +
 779		 RX_JUMBO_RING_ENTRIES +
 780		 RX_MINI_RING_ENTRIES +
 781		 RX_RETURN_RING_ENTRIES));
 782
 783	ap->rx_std_ring = pci_alloc_consistent(ap->pdev, size,
 784					       &ap->rx_ring_base_dma);
 785	if (ap->rx_std_ring == NULL)
 786		goto fail;
 787
 788	ap->rx_jumbo_ring = ap->rx_std_ring + RX_STD_RING_ENTRIES;
 789	ap->rx_mini_ring = ap->rx_jumbo_ring + RX_JUMBO_RING_ENTRIES;
 790	ap->rx_return_ring = ap->rx_mini_ring + RX_MINI_RING_ENTRIES;
 791
 792	size = (sizeof(struct event) * EVT_RING_ENTRIES);
 793
 794	ap->evt_ring = pci_alloc_consistent(ap->pdev, size, &ap->evt_ring_dma);
 795
 796	if (ap->evt_ring == NULL)
 797		goto fail;
 798
 799	/*
 800	 * Only allocate a host TX ring for the Tigon II, the Tigon I
 801	 * has to use PCI registers for this ;-(
 802	 */
 803	if (!ACE_IS_TIGON_I(ap)) {
 804		size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES);
 805
 806		ap->tx_ring = pci_alloc_consistent(ap->pdev, size,
 807						   &ap->tx_ring_dma);
 808
 809		if (ap->tx_ring == NULL)
 810			goto fail;
 811	}
 812
 813	ap->evt_prd = pci_alloc_consistent(ap->pdev, sizeof(u32),
 814					   &ap->evt_prd_dma);
 815	if (ap->evt_prd == NULL)
 816		goto fail;
 817
 818	ap->rx_ret_prd = pci_alloc_consistent(ap->pdev, sizeof(u32),
 819					      &ap->rx_ret_prd_dma);
 820	if (ap->rx_ret_prd == NULL)
 821		goto fail;
 822
 823	ap->tx_csm = pci_alloc_consistent(ap->pdev, sizeof(u32),
 824					  &ap->tx_csm_dma);
 825	if (ap->tx_csm == NULL)
 826		goto fail;
 827
 828	return 0;
 829
 830fail:
 831	/* Clean up. */
 832	ace_init_cleanup(dev);
 833	return 1;
 834}
 835
 836
 837/*
 838 * Generic cleanup handling data allocated during init. Used when the
 839 * module is unloaded or if an error occurs during initialization
 840 */
 841static void ace_init_cleanup(struct net_device *dev)
 842{
 843	struct ace_private *ap;
 844
 845	ap = netdev_priv(dev);
 846
 847	ace_free_descriptors(dev);
 848
 849	if (ap->info)
 850		pci_free_consistent(ap->pdev, sizeof(struct ace_info),
 851				    ap->info, ap->info_dma);
 852	kfree(ap->skb);
 853	kfree(ap->trace_buf);
 854
 855	if (dev->irq)
 856		free_irq(dev->irq, dev);
 857
 858	iounmap(ap->regs);
 859}
 860
 861
 862/*
 863 * Commands are considered to be slow.
 864 */
 865static inline void ace_issue_cmd(struct ace_regs __iomem *regs, struct cmd *cmd)
 866{
 867	u32 idx;
 868
 869	idx = readl(&regs->CmdPrd);
 870
 871	writel(*(u32 *)(cmd), &regs->CmdRng[idx]);
 872	idx = (idx + 1) % CMD_RING_ENTRIES;
 873
 874	writel(idx, &regs->CmdPrd);
 875}
 876
 877
 878static int __devinit ace_init(struct net_device *dev)
 879{
 880	struct ace_private *ap;
 881	struct ace_regs __iomem *regs;
 882	struct ace_info *info = NULL;
 883	struct pci_dev *pdev;
 884	unsigned long myjif;
 885	u64 tmp_ptr;
 886	u32 tig_ver, mac1, mac2, tmp, pci_state;
 887	int board_idx, ecode = 0;
 888	short i;
 889	unsigned char cache_size;
 890
 891	ap = netdev_priv(dev);
 892	regs = ap->regs;
 893
 894	board_idx = ap->board_idx;
 895
 896	/*
 897	 * aman@sgi.com - its useful to do a NIC reset here to
 898	 * address the `Firmware not running' problem subsequent
 899	 * to any crashes involving the NIC
 900	 */
 901	writel(HW_RESET | (HW_RESET << 24), &regs->HostCtrl);
 902	readl(&regs->HostCtrl);		/* PCI write posting */
 903	udelay(5);
 904
 905	/*
 906	 * Don't access any other registers before this point!
 907	 */
 908#ifdef __BIG_ENDIAN
 909	/*
 910	 * This will most likely need BYTE_SWAP once we switch
 911	 * to using __raw_writel()
 912	 */
 913	writel((WORD_SWAP | CLR_INT | ((WORD_SWAP | CLR_INT) << 24)),
 914	       &regs->HostCtrl);
 915#else
 916	writel((CLR_INT | WORD_SWAP | ((CLR_INT | WORD_SWAP) << 24)),
 917	       &regs->HostCtrl);
 918#endif
 919	readl(&regs->HostCtrl);		/* PCI write posting */
 920
 921	/*
 922	 * Stop the NIC CPU and clear pending interrupts
 923	 */
 924	writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
 925	readl(&regs->CpuCtrl);		/* PCI write posting */
 926	writel(0, &regs->Mb0Lo);
 927
 928	tig_ver = readl(&regs->HostCtrl) >> 28;
 929
 930	switch(tig_ver){
 931#ifndef CONFIG_ACENIC_OMIT_TIGON_I
 932	case 4:
 933	case 5:
 934		printk(KERN_INFO "  Tigon I  (Rev. %i), Firmware: %i.%i.%i, ",
 935		       tig_ver, ap->firmware_major, ap->firmware_minor,
 936		       ap->firmware_fix);
 937		writel(0, &regs->LocalCtrl);
 938		ap->version = 1;
 939		ap->tx_ring_entries = TIGON_I_TX_RING_ENTRIES;
 940		break;
 941#endif
 942	case 6:
 943		printk(KERN_INFO "  Tigon II (Rev. %i), Firmware: %i.%i.%i, ",
 944		       tig_ver, ap->firmware_major, ap->firmware_minor,
 945		       ap->firmware_fix);
 946		writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
 947		readl(&regs->CpuBCtrl);		/* PCI write posting */
 948		/*
 949		 * The SRAM bank size does _not_ indicate the amount
 950		 * of memory on the card, it controls the _bank_ size!
 951		 * Ie. a 1MB AceNIC will have two banks of 512KB.
 952		 */
 953		writel(SRAM_BANK_512K, &regs->LocalCtrl);
 954		writel(SYNC_SRAM_TIMING, &regs->MiscCfg);
 955		ap->version = 2;
 956		ap->tx_ring_entries = MAX_TX_RING_ENTRIES;
 957		break;
 958	default:
 959		printk(KERN_WARNING "  Unsupported Tigon version detected "
 960		       "(%i)\n", tig_ver);
 961		ecode = -ENODEV;
 962		goto init_error;
 963	}
 964
 965	/*
 966	 * ModeStat _must_ be set after the SRAM settings as this change
 967	 * seems to corrupt the ModeStat and possible other registers.
 968	 * The SRAM settings survive resets and setting it to the same
 969	 * value a second time works as well. This is what caused the
 970	 * `Firmware not running' problem on the Tigon II.
 971	 */
 972#ifdef __BIG_ENDIAN
 973	writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL | ACE_BYTE_SWAP_BD |
 974	       ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, &regs->ModeStat);
 975#else
 976	writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL |
 977	       ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, &regs->ModeStat);
 978#endif
 979	readl(&regs->ModeStat);		/* PCI write posting */
 980
 981	mac1 = 0;
 982	for(i = 0; i < 4; i++) {
 983		int t;
 984
 985		mac1 = mac1 << 8;
 986		t = read_eeprom_byte(dev, 0x8c+i);
 987		if (t < 0) {
 988			ecode = -EIO;
 989			goto init_error;
 990		} else
 991			mac1 |= (t & 0xff);
 992	}
 993	mac2 = 0;
 994	for(i = 4; i < 8; i++) {
 995		int t;
 996
 997		mac2 = mac2 << 8;
 998		t = read_eeprom_byte(dev, 0x8c+i);
 999		if (t < 0) {
1000			ecode = -EIO;
1001			goto init_error;
1002		} else
1003			mac2 |= (t & 0xff);
1004	}
1005
1006	writel(mac1, &regs->MacAddrHi);
1007	writel(mac2, &regs->MacAddrLo);
1008
1009	dev->dev_addr[0] = (mac1 >> 8) & 0xff;
1010	dev->dev_addr[1] = mac1 & 0xff;
1011	dev->dev_addr[2] = (mac2 >> 24) & 0xff;
1012	dev->dev_addr[3] = (mac2 >> 16) & 0xff;
1013	dev->dev_addr[4] = (mac2 >> 8) & 0xff;
1014	dev->dev_addr[5] = mac2 & 0xff;
1015
1016	printk("MAC: %pM\n", dev->dev_addr);
1017
1018	/*
1019	 * Looks like this is necessary to deal with on all architectures,
1020	 * even this %$#%$# N440BX Intel based thing doesn't get it right.
1021	 * Ie. having two NICs in the machine, one will have the cache
1022	 * line set at boot time, the other will not.
1023	 */
1024	pdev = ap->pdev;
1025	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_size);
1026	cache_size <<= 2;
1027	if (cache_size != SMP_CACHE_BYTES) {
1028		printk(KERN_INFO "  PCI cache line size set incorrectly "
1029		       "(%i bytes) by BIOS/FW, ", cache_size);
1030		if (cache_size > SMP_CACHE_BYTES)
1031			printk("expecting %i\n", SMP_CACHE_BYTES);
1032		else {
1033			printk("correcting to %i\n", SMP_CACHE_BYTES);
1034			pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE,
1035					      SMP_CACHE_BYTES >> 2);
1036		}
1037	}
1038
1039	pci_state = readl(&regs->PciState);
1040	printk(KERN_INFO "  PCI bus width: %i bits, speed: %iMHz, "
1041	       "latency: %i clks\n",
1042	       	(pci_state & PCI_32BIT) ? 32 : 64,
1043		(pci_state & PCI_66MHZ) ? 66 : 33,
1044		ap->pci_latency);
1045
1046	/*
1047	 * Set the max DMA transfer size. Seems that for most systems
1048	 * the performance is better when no MAX parameter is
1049	 * set. However for systems enabling PCI write and invalidate,
1050	 * DMA writes must be set to the L1 cache line size to get
1051	 * optimal performance.
1052	 *
1053	 * The default is now to turn the PCI write and invalidate off
1054	 * - that is what Alteon does for NT.
1055	 */
1056	tmp = READ_CMD_MEM | WRITE_CMD_MEM;
1057	if (ap->version >= 2) {
1058		tmp |= (MEM_READ_MULTIPLE | (pci_state & PCI_66MHZ));
1059		/*
1060		 * Tuning parameters only supported for 8 cards
1061		 */
1062		if (board_idx == BOARD_IDX_OVERFLOW ||
1063		    dis_pci_mem_inval[board_idx]) {
1064			if (ap->pci_command & PCI_COMMAND_INVALIDATE) {
1065				ap->pci_command &= ~PCI_COMMAND_INVALIDATE;
1066				pci_write_config_word(pdev, PCI_COMMAND,
1067						      ap->pci_command);
1068				printk(KERN_INFO "  Disabling PCI memory "
1069				       "write and invalidate\n");
1070			}
1071		} else if (ap->pci_command & PCI_COMMAND_INVALIDATE) {
1072			printk(KERN_INFO "  PCI memory write & invalidate "
1073			       "enabled by BIOS, enabling counter measures\n");
1074
1075			switch(SMP_CACHE_BYTES) {
1076			case 16:
1077				tmp |= DMA_WRITE_MAX_16;
1078				break;
1079			case 32:
1080				tmp |= DMA_WRITE_MAX_32;
1081				break;
1082			case 64:
1083				tmp |= DMA_WRITE_MAX_64;
1084				break;
1085			case 128:
1086				tmp |= DMA_WRITE_MAX_128;
1087				break;
1088			default:
1089				printk(KERN_INFO "  Cache line size %i not "
1090				       "supported, PCI write and invalidate "
1091				       "disabled\n", SMP_CACHE_BYTES);
1092				ap->pci_command &= ~PCI_COMMAND_INVALIDATE;
1093				pci_write_config_word(pdev, PCI_COMMAND,
1094						      ap->pci_command);
1095			}
1096		}
1097	}
1098
1099#ifdef __sparc__
1100	/*
1101	 * On this platform, we know what the best dma settings
1102	 * are.  We use 64-byte maximum bursts, because if we
1103	 * burst larger than the cache line size (or even cross
1104	 * a 64byte boundary in a single burst) the UltraSparc
1105	 * PCI controller will disconnect at 64-byte multiples.
1106	 *
1107	 * Read-multiple will be properly enabled above, and when
1108	 * set will give the PCI controller proper hints about
1109	 * prefetching.
1110	 */
1111	tmp &= ~DMA_READ_WRITE_MASK;
1112	tmp |= DMA_READ_MAX_64;
1113	tmp |= DMA_WRITE_MAX_64;
1114#endif
1115#ifdef __alpha__
1116	tmp &= ~DMA_READ_WRITE_MASK;
1117	tmp |= DMA_READ_MAX_128;
1118	/*
1119	 * All the docs say MUST NOT. Well, I did.
1120	 * Nothing terrible happens, if we load wrong size.
1121	 * Bit w&i still works better!
1122	 */
1123	tmp |= DMA_WRITE_MAX_128;
1124#endif
1125	writel(tmp, &regs->PciState);
1126
1127#if 0
1128	/*
1129	 * The Host PCI bus controller driver has to set FBB.
1130	 * If all devices on that PCI bus support FBB, then the controller
1131	 * can enable FBB support in the Host PCI Bus controller (or on
1132	 * the PCI-PCI bridge if that applies).
1133	 * -ggg
1134	 */
1135	/*
1136	 * I have received reports from people having problems when this
1137	 * bit is enabled.
1138	 */
1139	if (!(ap->pci_command & PCI_COMMAND_FAST_BACK)) {
1140		printk(KERN_INFO "  Enabling PCI Fast Back to Back\n");
1141		ap->pci_command |= PCI_COMMAND_FAST_BACK;
1142		pci_write_config_word(pdev, PCI_COMMAND, ap->pci_command);
1143	}
1144#endif
1145
1146	/*
1147	 * Configure DMA attributes.
1148	 */
1149	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
1150		ap->pci_using_dac = 1;
1151	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
1152		ap->pci_using_dac = 0;
1153	} else {
1154		ecode = -ENODEV;
1155		goto init_error;
1156	}
1157
1158	/*
1159	 * Initialize the generic info block and the command+event rings
1160	 * and the control blocks for the transmit and receive rings
1161	 * as they need to be setup once and for all.
1162	 */
1163	if (!(info = pci_alloc_consistent(ap->pdev, sizeof(struct ace_info),
1164					  &ap->info_dma))) {
1165		ecode = -EAGAIN;
1166		goto init_error;
1167	}
1168	ap->info = info;
1169
1170	/*
1171	 * Get the memory for the skb rings.
1172	 */
1173	if (!(ap->skb = kmalloc(sizeof(struct ace_skb), GFP_KERNEL))) {
1174		ecode = -EAGAIN;
1175		goto init_error;
1176	}
1177
1178	ecode = request_irq(pdev->irq, ace_interrupt, IRQF_SHARED,
1179			    DRV_NAME, dev);
1180	if (ecode) {
1181		printk(KERN_WARNING "%s: Requested IRQ %d is busy\n",
1182		       DRV_NAME, pdev->irq);
1183		goto init_error;
1184	} else
1185		dev->irq = pdev->irq;
1186
1187#ifdef INDEX_DEBUG
1188	spin_lock_init(&ap->debug_lock);
1189	ap->last_tx = ACE_TX_RING_ENTRIES(ap) - 1;
1190	ap->last_std_rx = 0;
1191	ap->last_mini_rx = 0;
1192#endif
1193
1194	memset(ap->info, 0, sizeof(struct ace_info));
1195	memset(ap->skb, 0, sizeof(struct ace_skb));
1196
1197	ecode = ace_load_firmware(dev);
1198	if (ecode)
1199		goto init_error;
1200
1201	ap->fw_running = 0;
1202
1203	tmp_ptr = ap->info_dma;
1204	writel(tmp_ptr >> 32, &regs->InfoPtrHi);
1205	writel(tmp_ptr & 0xffffffff, &regs->InfoPtrLo);
1206
1207	memset(ap->evt_ring, 0, EVT_RING_ENTRIES * sizeof(struct event));
1208
1209	set_aceaddr(&info->evt_ctrl.rngptr, ap->evt_ring_dma);
1210	info->evt_ctrl.flags = 0;
1211
1212	*(ap->evt_prd) = 0;
1213	wmb();
1214	set_aceaddr(&info->evt_prd_ptr, ap->evt_prd_dma);
1215	writel(0, &regs->EvtCsm);
1216
1217	set_aceaddr(&info->cmd_ctrl.rngptr, 0x100);
1218	info->cmd_ctrl.flags = 0;
1219	info->cmd_ctrl.max_len = 0;
1220
1221	for (i = 0; i < CMD_RING_ENTRIES; i++)
1222		writel(0, &regs->CmdRng[i]);
1223
1224	writel(0, &regs->CmdPrd);
1225	writel(0, &regs->CmdCsm);
1226
1227	tmp_ptr = ap->info_dma;
1228	tmp_ptr += (unsigned long) &(((struct ace_info *)0)->s.stats);
1229	set_aceaddr(&info->stats2_ptr, (dma_addr_t) tmp_ptr);
1230
1231	set_aceaddr(&info->rx_std_ctrl.rngptr, ap->rx_ring_base_dma);
1232	info->rx_std_ctrl.max_len = ACE_STD_BUFSIZE;
1233	info->rx_std_ctrl.flags =
1234	  RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1235
1236	memset(ap->rx_std_ring, 0,
1237	       RX_STD_RING_ENTRIES * sizeof(struct rx_desc));
1238
1239	for (i = 0; i < RX_STD_RING_ENTRIES; i++)
1240		ap->rx_std_ring[i].flags = BD_FLG_TCP_UDP_SUM;
1241
1242	ap->rx_std_skbprd = 0;
1243	atomic_set(&ap->cur_rx_bufs, 0);
1244
1245	set_aceaddr(&info->rx_jumbo_ctrl.rngptr,
1246		    (ap->rx_ring_base_dma +
1247		     (sizeof(struct rx_desc) * RX_STD_RING_ENTRIES)));
1248	info->rx_jumbo_ctrl.max_len = 0;
1249	info->rx_jumbo_ctrl.flags =
1250	  RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1251
1252	memset(ap->rx_jumbo_ring, 0,
1253	       RX_JUMBO_RING_ENTRIES * sizeof(struct rx_desc));
1254
1255	for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++)
1256		ap->rx_jumbo_ring[i].flags = BD_FLG_TCP_UDP_SUM | BD_FLG_JUMBO;
1257
1258	ap->rx_jumbo_skbprd = 0;
1259	atomic_set(&ap->cur_jumbo_bufs, 0);
1260
1261	memset(ap->rx_mini_ring, 0,
1262	       RX_MINI_RING_ENTRIES * sizeof(struct rx_desc));
1263
1264	if (ap->version >= 2) {
1265		set_aceaddr(&info->rx_mini_ctrl.rngptr,
1266			    (ap->rx_ring_base_dma +
1267			     (sizeof(struct rx_desc) *
1268			      (RX_STD_RING_ENTRIES +
1269			       RX_JUMBO_RING_ENTRIES))));
1270		info->rx_mini_ctrl.max_len = ACE_MINI_SIZE;
1271		info->rx_mini_ctrl.flags =
1272		  RCB_FLG_TCP_UDP_SUM|RCB_FLG_NO_PSEUDO_HDR|RCB_FLG_VLAN_ASSIST;
1273
1274		for (i = 0; i < RX_MINI_RING_ENTRIES; i++)
1275			ap->rx_mini_ring[i].flags =
1276				BD_FLG_TCP_UDP_SUM | BD_FLG_MINI;
1277	} else {
1278		set_aceaddr(&info->rx_mini_ctrl.rngptr, 0);
1279		info->rx_mini_ctrl.flags = RCB_FLG_RNG_DISABLE;
1280		info->rx_mini_ctrl.max_len = 0;
1281	}
1282
1283	ap->rx_mini_skbprd = 0;
1284	atomic_set(&ap->cur_mini_bufs, 0);
1285
1286	set_aceaddr(&info->rx_return_ctrl.rngptr,
1287		    (ap->rx_ring_base_dma +
1288		     (sizeof(struct rx_desc) *
1289		      (RX_STD_RING_ENTRIES +
1290		       RX_JUMBO_RING_ENTRIES +
1291		       RX_MINI_RING_ENTRIES))));
1292	info->rx_return_ctrl.flags = 0;
1293	info->rx_return_ctrl.max_len = RX_RETURN_RING_ENTRIES;
1294
1295	memset(ap->rx_return_ring, 0,
1296	       RX_RETURN_RING_ENTRIES * sizeof(struct rx_desc));
1297
1298	set_aceaddr(&info->rx_ret_prd_ptr, ap->rx_ret_prd_dma);
1299	*(ap->rx_ret_prd) = 0;
1300
1301	writel(TX_RING_BASE, &regs->WinBase);
1302
1303	if (ACE_IS_TIGON_I(ap)) {
1304		ap->tx_ring = (__force struct tx_desc *) regs->Window;
1305		for (i = 0; i < (TIGON_I_TX_RING_ENTRIES
1306				 * sizeof(struct tx_desc)) / sizeof(u32); i++)
1307			writel(0, (__force void __iomem *)ap->tx_ring  + i * 4);
1308
1309		set_aceaddr(&info->tx_ctrl.rngptr, TX_RING_BASE);
1310	} else {
1311		memset(ap->tx_ring, 0,
1312		       MAX_TX_RING_ENTRIES * sizeof(struct tx_desc));
1313
1314		set_aceaddr(&info->tx_ctrl.rngptr, ap->tx_ring_dma);
1315	}
1316
1317	info->tx_ctrl.max_len = ACE_TX_RING_ENTRIES(ap);
1318	tmp = RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1319
1320	/*
1321	 * The Tigon I does not like having the TX ring in host memory ;-(
1322	 */
1323	if (!ACE_IS_TIGON_I(ap))
1324		tmp |= RCB_FLG_TX_HOST_RING;
1325#if TX_COAL_INTS_ONLY
1326	tmp |= RCB_FLG_COAL_INT_ONLY;
1327#endif
1328	info->tx_ctrl.flags = tmp;
1329
1330	set_aceaddr(&info->tx_csm_ptr, ap->tx_csm_dma);
1331
1332	/*
1333	 * Potential item for tuning parameter
1334	 */
1335#if 0 /* NO */
1336	writel(DMA_THRESH_16W, &regs->DmaReadCfg);
1337	writel(DMA_THRESH_16W, &regs->DmaWriteCfg);
1338#else
1339	writel(DMA_THRESH_8W, &regs->DmaReadCfg);
1340	writel(DMA_THRESH_8W, &regs->DmaWriteCfg);
1341#endif
1342
1343	writel(0, &regs->MaskInt);
1344	writel(1, &regs->IfIdx);
1345#if 0
1346	/*
1347	 * McKinley boxes do not like us fiddling with AssistState
1348	 * this early
1349	 */
1350	writel(1, &regs->AssistState);
1351#endif
1352
1353	writel(DEF_STAT, &regs->TuneStatTicks);
1354	writel(DEF_TRACE, &regs->TuneTrace);
1355
1356	ace_set_rxtx_parms(dev, 0);
1357
1358	if (board_idx == BOARD_IDX_OVERFLOW) {
1359		printk(KERN_WARNING "%s: more than %i NICs detected, "
1360		       "ignoring module parameters!\n",
1361		       ap->name, ACE_MAX_MOD_PARMS);
1362	} else if (board_idx >= 0) {
1363		if (tx_coal_tick[board_idx])
1364			writel(tx_coal_tick[board_idx],
1365			       &regs->TuneTxCoalTicks);
1366		if (max_tx_desc[board_idx])
1367			writel(max_tx_desc[board_idx], &regs->TuneMaxTxDesc);
1368
1369		if (rx_coal_tick[board_idx])
1370			writel(rx_coal_tick[board_idx],
1371			       &regs->TuneRxCoalTicks);
1372		if (max_rx_desc[board_idx])
1373			writel(max_rx_desc[board_idx], &regs->TuneMaxRxDesc);
1374
1375		if (trace[board_idx])
1376			writel(trace[board_idx], &regs->TuneTrace);
1377
1378		if ((tx_ratio[board_idx] > 0) && (tx_ratio[board_idx] < 64))
1379			writel(tx_ratio[board_idx], &regs->TxBufRat);
1380	}
1381
1382	/*
1383	 * Default link parameters
1384	 */
1385	tmp = LNK_ENABLE | LNK_FULL_DUPLEX | LNK_1000MB | LNK_100MB |
1386		LNK_10MB | LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL | LNK_NEGOTIATE;
1387	if(ap->version >= 2)
1388		tmp |= LNK_TX_FLOW_CTL_Y;
1389
1390	/*
1391	 * Override link default parameters
1392	 */
1393	if ((board_idx >= 0) && link_state[board_idx]) {
1394		int option = link_state[board_idx];
1395
1396		tmp = LNK_ENABLE;
1397
1398		if (option & 0x01) {
1399			printk(KERN_INFO "%s: Setting half duplex link\n",
1400			       ap->name);
1401			tmp &= ~LNK_FULL_DUPLEX;
1402		}
1403		if (option & 0x02)
1404			tmp &= ~LNK_NEGOTIATE;
1405		if (option & 0x10)
1406			tmp |= LNK_10MB;
1407		if (option & 0x20)
1408			tmp |= LNK_100MB;
1409		if (option & 0x40)
1410			tmp |= LNK_1000MB;
1411		if ((option & 0x70) == 0) {
1412			printk(KERN_WARNING "%s: No media speed specified, "
1413			       "forcing auto negotiation\n", ap->name);
1414			tmp |= LNK_NEGOTIATE | LNK_1000MB |
1415				LNK_100MB | LNK_10MB;
1416		}
1417		if ((option & 0x100) == 0)
1418			tmp |= LNK_NEG_FCTL;
1419		else
1420			printk(KERN_INFO "%s: Disabling flow control "
1421			       "negotiation\n", ap->name);
1422		if (option & 0x200)
1423			tmp |= LNK_RX_FLOW_CTL_Y;
1424		if ((option & 0x400) && (ap->version >= 2)) {
1425			printk(KERN_INFO "%s: Enabling TX flow control\n",
1426			       ap->name);
1427			tmp |= LNK_TX_FLOW_CTL_Y;
1428		}
1429	}
1430
1431	ap->link = tmp;
1432	writel(tmp, &regs->TuneLink);
1433	if (ap->version >= 2)
1434		writel(tmp, &regs->TuneFastLink);
1435
1436	writel(ap->firmware_start, &regs->Pc);
1437
1438	writel(0, &regs->Mb0Lo);
1439
1440	/*
1441	 * Set tx_csm before we start receiving interrupts, otherwise
1442	 * the interrupt handler might think it is supposed to process
1443	 * tx ints before we are up and running, which may cause a null
1444	 * pointer access in the int handler.
1445	 */
1446	ap->cur_rx = 0;
1447	ap->tx_prd = *(ap->tx_csm) = ap->tx_ret_csm = 0;
1448
1449	wmb();
1450	ace_set_txprd(regs, ap, 0);
1451	writel(0, &regs->RxRetCsm);
1452
1453       /*
1454	* Enable DMA engine now.
1455	* If we do this sooner, Mckinley box pukes.
1456	* I assume it's because Tigon II DMA engine wants to check
1457	* *something* even before the CPU is started.
1458	*/
1459       writel(1, &regs->AssistState);  /* enable DMA */
1460
1461	/*
1462	 * Start the NIC CPU
1463	 */
1464	writel(readl(&regs->CpuCtrl) & ~(CPU_HALT|CPU_TRACE), &regs->CpuCtrl);
1465	readl(&regs->CpuCtrl);
1466
1467	/*
1468	 * Wait for the firmware to spin up - max 3 seconds.
1469	 */
1470	myjif = jiffies + 3 * HZ;
1471	while (time_before(jiffies, myjif) && !ap->fw_running)
1472		cpu_relax();
1473
1474	if (!ap->fw_running) {
1475		printk(KERN_ERR "%s: Firmware NOT running!\n", ap->name);
1476
1477		ace_dump_trace(ap);
1478		writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
1479		readl(&regs->CpuCtrl);
1480
1481		/* aman@sgi.com - account for badly behaving firmware/NIC:
1482		 * - have observed that the NIC may continue to generate
1483		 *   interrupts for some reason; attempt to stop it - halt
1484		 *   second CPU for Tigon II cards, and also clear Mb0
1485		 * - if we're a module, we'll fail to load if this was
1486		 *   the only GbE card in the system => if the kernel does
1487		 *   see an interrupt from the NIC, code to handle it is
1488		 *   gone and OOps! - so free_irq also
1489		 */
1490		if (ap->version >= 2)
1491			writel(readl(&regs->CpuBCtrl) | CPU_HALT,
1492			       &regs->CpuBCtrl);
1493		writel(0, &regs->Mb0Lo);
1494		readl(&regs->Mb0Lo);
1495
1496		ecode = -EBUSY;
1497		goto init_error;
1498	}
1499
1500	/*
1501	 * We load the ring here as there seem to be no way to tell the
1502	 * firmware to wipe the ring without re-initializing it.
1503	 */
1504	if (!test_and_set_bit(0, &ap->std_refill_busy))
1505		ace_load_std_rx_ring(dev, RX_RING_SIZE);
1506	else
1507		printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n",
1508		       ap->name);
1509	if (ap->version >= 2) {
1510		if (!test_and_set_bit(0, &ap->mini_refill_busy))
1511			ace_load_mini_rx_ring(dev, RX_MINI_SIZE);
1512		else
1513			printk(KERN_ERR "%s: Someone is busy refilling "
1514			       "the RX mini ring\n", ap->name);
1515	}
1516	return 0;
1517
1518 init_error:
1519	ace_init_cleanup(dev);
1520	return ecode;
1521}
1522
1523
1524static void ace_set_rxtx_parms(struct net_device *dev, int jumbo)
1525{
1526	struct ace_private *ap = netdev_priv(dev);
1527	struct ace_regs __iomem *regs = ap->regs;
1528	int board_idx = ap->board_idx;
1529
1530	if (board_idx >= 0) {
1531		if (!jumbo) {
1532			if (!tx_coal_tick[board_idx])
1533				writel(DEF_TX_COAL, &regs->TuneTxCoalTicks);
1534			if (!max_tx_desc[board_idx])
1535				writel(DEF_TX_MAX_DESC, &regs->TuneMaxTxDesc);
1536			if (!rx_coal_tick[board_idx])
1537				writel(DEF_RX_COAL, &regs->TuneRxCoalTicks);
1538			if (!max_rx_desc[board_idx])
1539				writel(DEF_RX_MAX_DESC, &regs->TuneMaxRxDesc);
1540			if (!tx_ratio[board_idx])
1541				writel(DEF_TX_RATIO, &regs->TxBufRat);
1542		} else {
1543			if (!tx_coal_tick[board_idx])
1544				writel(DEF_JUMBO_TX_COAL,
1545				       &regs->TuneTxCoalTicks);
1546			if (!max_tx_desc[board_idx])
1547				writel(DEF_JUMBO_TX_MAX_DESC,
1548				       &regs->TuneMaxTxDesc);
1549			if (!rx_coal_tick[board_idx])
1550				writel(DEF_JUMBO_RX_COAL,
1551				       &regs->TuneRxCoalTicks);
1552			if (!max_rx_desc[board_idx])
1553				writel(DEF_JUMBO_RX_MAX_DESC,
1554				       &regs->TuneMaxRxDesc);
1555			if (!tx_ratio[board_idx])
1556				writel(DEF_JUMBO_TX_RATIO, &regs->TxBufRat);
1557		}
1558	}
1559}
1560
1561
1562static void ace_watchdog(struct net_device *data)
1563{
1564	struct net_device *dev = data;
1565	struct ace_private *ap = netdev_priv(dev);
1566	struct ace_regs __iomem *regs = ap->regs;
1567
1568	/*
1569	 * We haven't received a stats update event for more than 2.5
1570	 * seconds and there is data in the transmit queue, thus we
1571	 * assume the card is stuck.
1572	 */
1573	if (*ap->tx_csm != ap->tx_ret_csm) {
1574		printk(KERN_WARNING "%s: Transmitter is stuck, %08x\n",
1575		       dev->name, (unsigned int)readl(&regs->HostCtrl));
1576		/* This can happen due to ieee flow control. */
1577	} else {
1578		printk(KERN_DEBUG "%s: BUG... transmitter died. Kicking it.\n",
1579		       dev->name);
1580#if 0
1581		netif_wake_queue(dev);
1582#endif
1583	}
1584}
1585
1586
1587static void ace_tasklet(unsigned long arg)
1588{
1589	struct net_device *dev = (struct net_device *) arg;
1590	struct ace_private *ap = netdev_priv(dev);
1591	int cur_size;
1592
1593	cur_size = atomic_read(&ap->cur_rx_bufs);
1594	if ((cur_size < RX_LOW_STD_THRES) &&
1595	    !test_and_set_bit(0, &ap->std_refill_busy)) {
1596#ifdef DEBUG
1597		printk("refilling buffers (current %i)\n", cur_size);
1598#endif
1599		ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size);
1600	}
1601
1602	if (ap->version >= 2) {
1603		cur_size = atomic_read(&ap->cur_mini_bufs);
1604		if ((cur_size < RX_LOW_MINI_THRES) &&
1605		    !test_and_set_bit(0, &ap->mini_refill_busy)) {
1606#ifdef DEBUG
1607			printk("refilling mini buffers (current %i)\n",
1608			       cur_size);
1609#endif
1610			ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size);
1611		}
1612	}
1613
1614	cur_size = atomic_read(&ap->cur_jumbo_bufs);
1615	if (ap->jumbo && (cur_size < RX_LOW_JUMBO_THRES) &&
1616	    !test_and_set_bit(0, &ap->jumbo_refill_busy)) {
1617#ifdef DEBUG
1618		printk("refilling jumbo buffers (current %i)\n", cur_size);
1619#endif
1620		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size);
1621	}
1622	ap->tasklet_pending = 0;
1623}
1624
1625
1626/*
1627 * Copy the contents of the NIC's trace buffer to kernel memory.
1628 */
1629static void ace_dump_trace(struct ace_private *ap)
1630{
1631#if 0
1632	if (!ap->trace_buf)
1633		if (!(ap->trace_buf = kmalloc(ACE_TRACE_SIZE, GFP_KERNEL)))
1634		    return;
1635#endif
1636}
1637
1638
1639/*
1640 * Load the standard rx ring.
1641 *
1642 * Loading rings is safe without holding the spin lock since this is
1643 * done only before the device is enabled, thus no interrupts are
1644 * generated and by the interrupt handler/tasklet handler.
1645 */
1646static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs)
1647{
1648	struct ace_private *ap = netdev_priv(dev);
1649	struct ace_regs __iomem *regs = ap->regs;
1650	short i, idx;
1651
1652
1653	prefetchw(&ap->cur_rx_bufs);
1654
1655	idx = ap->rx_std_skbprd;
1656
1657	for (i = 0; i < nr_bufs; i++) {
1658		struct sk_buff *skb;
1659		struct rx_desc *rd;
1660		dma_addr_t mapping;
1661
1662		skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE);
1663		if (!skb)
1664			break;
1665
1666		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
1667				       offset_in_page(skb->data),
1668				       ACE_STD_BUFSIZE,
1669				       PCI_DMA_FROMDEVICE);
1670		ap->skb->rx_std_skbuff[idx].skb = skb;
1671		dma_unmap_addr_set(&ap->skb->rx_std_skbuff[idx],
1672				   mapping, mapping);
1673
1674		rd = &ap->rx_std_ring[idx];
1675		set_aceaddr(&rd->addr, mapping);
1676		rd->size = ACE_STD_BUFSIZE;
1677		rd->idx = idx;
1678		idx = (idx + 1) % RX_STD_RING_ENTRIES;
1679	}
1680
1681	if (!i)
1682		goto error_out;
1683
1684	atomic_add(i, &ap->cur_rx_bufs);
1685	ap->rx_std_skbprd = idx;
1686
1687	if (ACE_IS_TIGON_I(ap)) {
1688		struct cmd cmd;
1689		cmd.evt = C_SET_RX_PRD_IDX;
1690		cmd.code = 0;
1691		cmd.idx = ap->rx_std_skbprd;
1692		ace_issue_cmd(regs, &cmd);
1693	} else {
1694		writel(idx, &regs->RxStdPrd);
1695		wmb();
1696	}
1697
1698 out:
1699	clear_bit(0, &ap->std_refill_busy);
1700	return;
1701
1702 error_out:
1703	printk(KERN_INFO "Out of memory when allocating "
1704	       "standard receive buffers\n");
1705	goto out;
1706}
1707
1708
1709static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs)
1710{
1711	struct ace_private *ap = netdev_priv(dev);
1712	struct ace_regs __iomem *regs = ap->regs;
1713	short i, idx;
1714
1715	prefetchw(&ap->cur_mini_bufs);
1716
1717	idx = ap->rx_mini_skbprd;
1718	for (i = 0; i < nr_bufs; i++) {
1719		struct sk_buff *skb;
1720		struct rx_desc *rd;
1721		dma_addr_t mapping;
1722
1723		skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE);
1724		if (!skb)
1725			break;
1726
1727		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
1728				       offset_in_page(skb->data),
1729				       ACE_MINI_BUFSIZE,
1730				       PCI_DMA_FROMDEVICE);
1731		ap->skb->rx_mini_skbuff[idx].skb = skb;
1732		dma_unmap_addr_set(&ap->skb->rx_mini_skbuff[idx],
1733				   mapping, mapping);
1734
1735		rd = &ap->rx_mini_ring[idx];
1736		set_aceaddr(&rd->addr, mapping);
1737		rd->size = ACE_MINI_BUFSIZE;
1738		rd->idx = idx;
1739		idx = (idx + 1) % RX_MINI_RING_ENTRIES;
1740	}
1741
1742	if (!i)
1743		goto error_out;
1744
1745	atomic_add(i, &ap->cur_mini_bufs);
1746
1747	ap->rx_mini_skbprd = idx;
1748
1749	writel(idx, &regs->RxMiniPrd);
1750	wmb();
1751
1752 out:
1753	clear_bit(0, &ap->mini_refill_busy);
1754	return;
1755 error_out:
1756	printk(KERN_INFO "Out of memory when allocating "
1757	       "mini receive buffers\n");
1758	goto out;
1759}
1760
1761
1762/*
1763 * Load the jumbo rx ring, this may happen at any time if the MTU
1764 * is changed to a value > 1500.
1765 */
1766static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs)
1767{
1768	struct ace_private *ap = netdev_priv(dev);
1769	struct ace_regs __iomem *regs = ap->regs;
1770	short i, idx;
1771
1772	idx = ap->rx_jumbo_skbprd;
1773
1774	for (i = 0; i < nr_bufs; i++) {
1775		struct sk_buff *skb;
1776		struct rx_desc *rd;
1777		dma_addr_t mapping;
1778
1779		skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE);
1780		if (!skb)
1781			break;
1782
1783		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
1784				       offset_in_page(skb->data),
1785				       ACE_JUMBO_BUFSIZE,
1786				       PCI_DMA_FROMDEVICE);
1787		ap->skb->rx_jumbo_skbuff[idx].skb = skb;
1788		dma_unmap_addr_set(&ap->skb->rx_jumbo_skbuff[idx],
1789				   mapping, mapping);
1790
1791		rd = &ap->rx_jumbo_ring[idx];
1792		set_aceaddr(&rd->addr, mapping);
1793		rd->size = ACE_JUMBO_BUFSIZE;
1794		rd->idx = idx;
1795		idx = (idx + 1) % RX_JUMBO_RING_ENTRIES;
1796	}
1797
1798	if (!i)
1799		goto error_out;
1800
1801	atomic_add(i, &ap->cur_jumbo_bufs);
1802	ap->rx_jumbo_skbprd = idx;
1803
1804	if (ACE_IS_TIGON_I(ap)) {
1805		struct cmd cmd;
1806		cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
1807		cmd.code = 0;
1808		cmd.idx = ap->rx_jumbo_skbprd;
1809		ace_issue_cmd(regs, &cmd);
1810	} else {
1811		writel(idx, &regs->RxJumboPrd);
1812		wmb();
1813	}
1814
1815 out:
1816	clear_bit(0, &ap->jumbo_refill_busy);
1817	return;
1818 error_out:
1819	if (net_ratelimit())
1820		printk(KERN_INFO "Out of memory when allocating "
1821		       "jumbo receive buffers\n");
1822	goto out;
1823}
1824
1825
1826/*
1827 * All events are considered to be slow (RX/TX ints do not generate
1828 * events) and are handled here, outside the main interrupt handler,
1829 * to reduce the size of the handler.
1830 */
1831static u32 ace_handle_event(struct net_device *dev, u32 evtcsm, u32 evtprd)
1832{
1833	struct ace_private *ap;
1834
1835	ap = netdev_priv(dev);
1836
1837	while (evtcsm != evtprd) {
1838		switch (ap->evt_ring[evtcsm].evt) {
1839		case E_FW_RUNNING:
1840			printk(KERN_INFO "%s: Firmware up and running\n",
1841			       ap->name);
1842			ap->fw_running = 1;
1843			wmb();
1844			break;
1845		case E_STATS_UPDATED:
1846			break;
1847		case E_LNK_STATE:
1848		{
1849			u16 code = ap->evt_ring[evtcsm].code;
1850			switch (code) {
1851			case E_C_LINK_UP:
1852			{
1853				u32 state = readl(&ap->regs->GigLnkState);
1854				printk(KERN_WARNING "%s: Optical link UP "
1855				       "(%s Duplex, Flow Control: %s%s)\n",
1856				       ap->name,
1857				       state & LNK_FULL_DUPLEX ? "Full":"Half",
1858				       state & LNK_TX_FLOW_CTL_Y ? "TX " : "",
1859				       state & LNK_RX_FLOW_CTL_Y ? "RX" : "");
1860				break;
1861			}
1862			case E_C_LINK_DOWN:
1863				printk(KERN_WARNING "%s: Optical link DOWN\n",
1864				       ap->name);
1865				break;
1866			case E_C_LINK_10_100:
1867				printk(KERN_WARNING "%s: 10/100BaseT link "
1868				       "UP\n", ap->name);
1869				break;
1870			default:
1871				printk(KERN_ERR "%s: Unknown optical link "
1872				       "state %02x\n", ap->name, code);
1873			}
1874			break;
1875		}
1876		case E_ERROR:
1877			switch(ap->evt_ring[evtcsm].code) {
1878			case E_C_ERR_INVAL_CMD:
1879				printk(KERN_ERR "%s: invalid command error\n",
1880				       ap->name);
1881				break;
1882			case E_C_ERR_UNIMP_CMD:
1883				printk(KERN_ERR "%s: unimplemented command "
1884				       "error\n", ap->name);
1885				break;
1886			case E_C_ERR_BAD_CFG:
1887				printk(KERN_ERR "%s: bad config error\n",
1888				       ap->name);
1889				break;
1890			default:
1891				printk(KERN_ERR "%s: unknown error %02x\n",
1892				       ap->name, ap->evt_ring[evtcsm].code);
1893			}
1894			break;
1895		case E_RESET_JUMBO_RNG:
1896		{
1897			int i;
1898			for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) {
1899				if (ap->skb->rx_jumbo_skbuff[i].skb) {
1900					ap->rx_jumbo_ring[i].size = 0;
1901					set_aceaddr(&ap->rx_jumbo_ring[i].addr, 0);
1902					dev_kfree_skb(ap->skb->rx_jumbo_skbuff[i].skb);
1903					ap->skb->rx_jumbo_skbuff[i].skb = NULL;
1904				}
1905			}
1906
1907 			if (ACE_IS_TIGON_I(ap)) {
1908 				struct cmd cmd;
1909 				cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
1910 				cmd.code = 0;
1911 				cmd.idx = 0;
1912 				ace_issue_cmd(ap->regs, &cmd);
1913 			} else {
1914 				writel(0, &((ap->regs)->RxJumboPrd));
1915 				wmb();
1916 			}
1917
1918			ap->jumbo = 0;
1919			ap->rx_jumbo_skbprd = 0;
1920			printk(KERN_INFO "%s: Jumbo ring flushed\n",
1921			       ap->name);
1922			clear_bit(0, &ap->jumbo_refill_busy);
1923			break;
1924		}
1925		default:
1926			printk(KERN_ERR "%s: Unhandled event 0x%02x\n",
1927			       ap->name, ap->evt_ring[evtcsm].evt);
1928		}
1929		evtcsm = (evtcsm + 1) % EVT_RING_ENTRIES;
1930	}
1931
1932	return evtcsm;
1933}
1934
1935
1936static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
1937{
1938	struct ace_private *ap = netdev_priv(dev);
1939	u32 idx;
1940	int mini_count = 0, std_count = 0;
1941
1942	idx = rxretcsm;
1943
1944	prefetchw(&ap->cur_rx_bufs);
1945	prefetchw(&ap->cur_mini_bufs);
1946
1947	while (idx != rxretprd) {
1948		struct ring_info *rip;
1949		struct sk_buff *skb;
1950		struct rx_desc *rxdesc, *retdesc;
1951		u32 skbidx;
1952		int bd_flags, desc_type, mapsize;
1953		u16 csum;
1954
1955
1956		/* make sure the rx descriptor isn't read before rxretprd */
1957		if (idx == rxretcsm)
1958			rmb();
1959
1960		retdesc = &ap->rx_return_ring[idx];
1961		skbidx = retdesc->idx;
1962		bd_flags = retdesc->flags;
1963		desc_type = bd_flags & (BD_FLG_JUMBO | BD_FLG_MINI);
1964
1965		switch(desc_type) {
1966			/*
1967			 * Normal frames do not have any flags set
1968			 *
1969			 * Mini and normal frames arrive frequently,
1970			 * so use a local counter to avoid doing
1971			 * atomic operations for each packet arriving.
1972			 */
1973		case 0:
1974			rip = &ap->skb->rx_std_skbuff[skbidx];
1975			mapsize = ACE_STD_BUFSIZE;
1976			rxdesc = &ap->rx_std_ring[skbidx];
1977			std_count++;
1978			break;
1979		case BD_FLG_JUMBO:
1980			rip = &ap->skb->rx_jumbo_skbuff[skbidx];
1981			mapsize = ACE_JUMBO_BUFSIZE;
1982			rxdesc = &ap->rx_jumbo_ring[skbidx];
1983			atomic_dec(&ap->cur_jumbo_bufs);
1984			break;
1985		case BD_FLG_MINI:
1986			rip = &ap->skb->rx_mini_skbuff[skbidx];
1987			mapsize = ACE_MINI_BUFSIZE;
1988			rxdesc = &ap->rx_mini_ring[skbidx];
1989			mini_count++;
1990			break;
1991		default:
1992			printk(KERN_INFO "%s: unknown frame type (0x%02x) "
1993			       "returned by NIC\n", dev->name,
1994			       retdesc->flags);
1995			goto error;
1996		}
1997
1998		skb = rip->skb;
1999		rip->skb = NULL;
2000		pci_unmap_page(ap->pdev,
2001			       dma_unmap_addr(rip, mapping),
2002			       mapsize,
2003			       PCI_DMA_FROMDEVICE);
2004		skb_put(skb, retdesc->size);
2005
2006		/*
2007		 * Fly baby, fly!
2008		 */
2009		csum = retdesc->tcp_udp_csum;
2010
2011		skb->protocol = eth_type_trans(skb, dev);
2012
2013		/*
2014		 * Instead of forcing the poor tigon mips cpu to calculate
2015		 * pseudo hdr checksum, we do this ourselves.
2016		 */
2017		if (bd_flags & BD_FLG_TCP_UDP_SUM) {
2018			skb->csum = htons(csum);
2019			skb->ip_summed = CHECKSUM_COMPLETE;
2020		} else {
2021			skb_checksum_none_assert(skb);
2022		}
2023
2024		/* send it up */
2025		if ((bd_flags & BD_FLG_VLAN_TAG))
2026			__vlan_hwaccel_put_tag(skb, retdesc->vlan);
2027		netif_rx(skb);
2028
2029		dev->stats.rx_packets++;
2030		dev->stats.rx_bytes += retdesc->size;
2031
2032		idx = (idx + 1) % RX_RETURN_RING_ENTRIES;
2033	}
2034
2035	atomic_sub(std_count, &ap->cur_rx_bufs);
2036	if (!ACE_IS_TIGON_I(ap))
2037		atomic_sub(mini_count, &ap->cur_mini_bufs);
2038
2039 out:
2040	/*
2041	 * According to the documentation RxRetCsm is obsolete with
2042	 * the 12.3.x Firmware - my Tigon I NICs seem to disagree!
2043	 */
2044	if (ACE_IS_TIGON_I(ap)) {
2045		writel(idx, &ap->regs->RxRetCsm);
2046	}
2047	ap->cur_rx = idx;
2048
2049	return;
2050 error:
2051	idx = rxretprd;
2052	goto out;
2053}
2054
2055
2056static inline void ace_tx_int(struct net_device *dev,
2057			      u32 txcsm, u32 idx)
2058{
2059	struct ace_private *ap = netdev_priv(dev);
2060
2061	do {
2062		struct sk_buff *skb;
2063		struct tx_ring_info *info;
2064
2065		info = ap->skb->tx_skbuff + idx;
2066		skb = info->skb;
2067
2068		if (dma_unmap_len(info, maplen)) {
2069			pci_unmap_page(ap->pdev, dma_unmap_addr(info, mapping),
2070				       dma_unmap_len(info, maplen),
2071				       PCI_DMA_TODEVICE);
2072			dma_unmap_len_set(info, maplen, 0);
2073		}
2074
2075		if (skb) {
2076			dev->stats.tx_packets++;
2077			dev->stats.tx_bytes += skb->len;
2078			dev_kfree_skb_irq(skb);
2079			info->skb = NULL;
2080		}
2081
2082		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2083	} while (idx != txcsm);
2084
2085	if (netif_queue_stopped(dev))
2086		netif_wake_queue(dev);
2087
2088	wmb();
2089	ap->tx_ret_csm = txcsm;
2090
2091	/* So... tx_ret_csm is advanced _after_ check for device wakeup.
2092	 *
2093	 * We could try to make it before. In this case we would get
2094	 * the following race condition: hard_start_xmit on other cpu
2095	 * enters after we advanced tx_ret_csm and fills space,
2096	 * which we have just freed, so that we make illegal device wakeup.
2097	 * There is no good way to workaround this (at entry
2098	 * to ace_start_xmit detects this condition and prevents
2099	 * ring corruption, but it is not a good workaround.)
2100	 *
2101	 * When tx_ret_csm is advanced after, we wake up device _only_
2102	 * if we really have some space in ring (though the core doing
2103	 * hard_start_xmit can see full ring for some period and has to
2104	 * synchronize.) Superb.
2105	 * BUT! We get another subtle race condition. hard_start_xmit
2106	 * may think that ring is full between wakeup and advancing
2107	 * tx_ret_csm and will stop device instantly! It is not so bad.
2108	 * We are guaranteed that there is something in ring, so that
2109	 * the next irq will resume transmission. To speedup this we could
2110	 * mark descriptor, which closes ring with BD_FLG_COAL_NOW
2111	 * (see ace_start_xmit).
2112	 *
2113	 * Well, this dilemma exists in all lock-free devices.
2114	 * We, following scheme used in drivers by Donald Becker,
2115	 * select the least dangerous.
2116	 *							--ANK
2117	 */
2118}
2119
2120
2121static irqreturn_t ace_interrupt(int irq, void *dev_id)
2122{
2123	struct net_device *dev = (struct net_device *)dev_id;
2124	struct ace_private *ap = netdev_priv(dev);
2125	struct ace_regs __iomem *regs = ap->regs;
2126	u32 idx;
2127	u32 txcsm, rxretcsm, rxretprd;
2128	u32 evtcsm, evtprd;
2129
2130	/*
2131	 * In case of PCI shared interrupts or spurious interrupts,
2132	 * we want to make sure it is actually our interrupt before
2133	 * spending any time in here.
2134	 */
2135	if (!(readl(&regs->HostCtrl) & IN_INT))
2136		return IRQ_NONE;
2137
2138	/*
2139	 * ACK intr now. Otherwise we will lose updates to rx_ret_prd,
2140	 * which happened _after_ rxretprd = *ap->rx_ret_prd; but before
2141	 * writel(0, &regs->Mb0Lo).
2142	 *
2143	 * "IRQ avoidance" recommended in docs applies to IRQs served
2144	 * threads and it is wrong even for that case.
2145	 */
2146	writel(0, &regs->Mb0Lo);
2147	readl(&regs->Mb0Lo);
2148
2149	/*
2150	 * There is no conflict between transmit handling in
2151	 * start_xmit and receive processing, thus there is no reason
2152	 * to take a spin lock for RX handling. Wait until we start
2153	 * working on the other stuff - hey we don't need a spin lock
2154	 * anymore.
2155	 */
2156	rxretprd = *ap->rx_ret_prd;
2157	rxretcsm = ap->cur_rx;
2158
2159	if (rxretprd != rxretcsm)
2160		ace_rx_int(dev, rxretprd, rxretcsm);
2161
2162	txcsm = *ap->tx_csm;
2163	idx = ap->tx_ret_csm;
2164
2165	if (txcsm != idx) {
2166		/*
2167		 * If each skb takes only one descriptor this check degenerates
2168		 * to identity, because new space has just been opened.
2169		 * But if skbs are fragmented we must check that this index
2170		 * update releases enough of space, otherwise we just
2171		 * wait for device to make more work.
2172		 */
2173		if (!tx_ring_full(ap, txcsm, ap->tx_prd))
2174			ace_tx_int(dev, txcsm, idx);
2175	}
2176
2177	evtcsm = readl(&regs->EvtCsm);
2178	evtprd = *ap->evt_prd;
2179
2180	if (evtcsm != evtprd) {
2181		evtcsm = ace_handle_event(dev, evtcsm, evtprd);
2182		writel(evtcsm, &regs->EvtCsm);
2183	}
2184
2185	/*
2186	 * This has to go last in the interrupt handler and run with
2187	 * the spin lock released ... what lock?
2188	 */
2189	if (netif_running(dev)) {
2190		int cur_size;
2191		int run_tasklet = 0;
2192
2193		cur_size = atomic_read(&ap->cur_rx_bufs);
2194		if (cur_size < RX_LOW_STD_THRES) {
2195			if ((cur_size < RX_PANIC_STD_THRES) &&
2196			    !test_and_set_bit(0, &ap->std_refill_busy)) {
2197#ifdef DEBUG
2198				printk("low on std buffers %i\n", cur_size);
2199#endif
2200				ace_load_std_rx_ring(dev,
2201						     RX_RING_SIZE - cur_size);
2202			} else
2203				run_tasklet = 1;
2204		}
2205
2206		if (!ACE_IS_TIGON_I(ap)) {
2207			cur_size = atomic_read(&ap->cur_mini_bufs);
2208			if (cur_size < RX_LOW_MINI_THRES) {
2209				if ((cur_size < RX_PANIC_MINI_THRES) &&
2210				    !test_and_set_bit(0,
2211						      &ap->mini_refill_busy)) {
2212#ifdef DEBUG
2213					printk("low on mini buffers %i\n",
2214					       cur_size);
2215#endif
2216					ace_load_mini_rx_ring(dev,
2217							      RX_MINI_SIZE - cur_size);
2218				} else
2219					run_tasklet = 1;
2220			}
2221		}
2222
2223		if (ap->jumbo) {
2224			cur_size = atomic_read(&ap->cur_jumbo_bufs);
2225			if (cur_size < RX_LOW_JUMBO_THRES) {
2226				if ((cur_size < RX_PANIC_JUMBO_THRES) &&
2227				    !test_and_set_bit(0,
2228						      &ap->jumbo_refill_busy)){
2229#ifdef DEBUG
2230					printk("low on jumbo buffers %i\n",
2231					       cur_size);
2232#endif
2233					ace_load_jumbo_rx_ring(dev,
2234							       RX_JUMBO_SIZE - cur_size);
2235				} else
2236					run_tasklet = 1;
2237			}
2238		}
2239		if (run_tasklet && !ap->tasklet_pending) {
2240			ap->tasklet_pending = 1;
2241			tasklet_schedule(&ap->ace_tasklet);
2242		}
2243	}
2244
2245	return IRQ_HANDLED;
2246}
2247
2248static int ace_open(struct net_device *dev)
2249{
2250	struct ace_private *ap = netdev_priv(dev);
2251	struct ace_regs __iomem *regs = ap->regs;
2252	struct cmd cmd;
2253
2254	if (!(ap->fw_running)) {
2255		printk(KERN_WARNING "%s: Firmware not running!\n", dev->name);
2256		return -EBUSY;
2257	}
2258
2259	writel(dev->mtu + ETH_HLEN + 4, &regs->IfMtu);
2260
2261	cmd.evt = C_CLEAR_STATS;
2262	cmd.code = 0;
2263	cmd.idx = 0;
2264	ace_issue_cmd(regs, &cmd);
2265
2266	cmd.evt = C_HOST_STATE;
2267	cmd.code = C_C_STACK_UP;
2268	cmd.idx = 0;
2269	ace_issue_cmd(regs, &cmd);
2270
2271	if (ap->jumbo &&
2272	    !test_and_set_bit(0, &ap->jumbo_refill_busy))
2273		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
2274
2275	if (dev->flags & IFF_PROMISC) {
2276		cmd.evt = C_SET_PROMISC_MODE;
2277		cmd.code = C_C_PROMISC_ENABLE;
2278		cmd.idx = 0;
2279		ace_issue_cmd(regs, &cmd);
2280
2281		ap->promisc = 1;
2282	}else
2283		ap->promisc = 0;
2284	ap->mcast_all = 0;
2285
2286#if 0
2287	cmd.evt = C_LNK_NEGOTIATION;
2288	cmd.code = 0;
2289	cmd.idx = 0;
2290	ace_issue_cmd(regs, &cmd);
2291#endif
2292
2293	netif_start_queue(dev);
2294
2295	/*
2296	 * Setup the bottom half rx ring refill handler
2297	 */
2298	tasklet_init(&ap->ace_tasklet, ace_tasklet, (unsigned long)dev);
2299	return 0;
2300}
2301
2302
2303static int ace_close(struct net_device *dev)
2304{
2305	struct ace_private *ap = netdev_priv(dev);
2306	struct ace_regs __iomem *regs = ap->regs;
2307	struct cmd cmd;
2308	unsigned long flags;
2309	short i;
2310
2311	/*
2312	 * Without (or before) releasing irq and stopping hardware, this
2313	 * is an absolute non-sense, by the way. It will be reset instantly
2314	 * by the first irq.
2315	 */
2316	netif_stop_queue(dev);
2317
2318
2319	if (ap->promisc) {
2320		cmd.evt = C_SET_PROMISC_MODE;
2321		cmd.code = C_C_PROMISC_DISABLE;
2322		cmd.idx = 0;
2323		ace_issue_cmd(regs, &cmd);
2324		ap->promisc = 0;
2325	}
2326
2327	cmd.evt = C_HOST_STATE;
2328	cmd.code = C_C_STACK_DOWN;
2329	cmd.idx = 0;
2330	ace_issue_cmd(regs, &cmd);
2331
2332	tasklet_kill(&ap->ace_tasklet);
2333
2334	/*
2335	 * Make sure one CPU is not processing packets while
2336	 * buffers are being released by another.
2337	 */
2338
2339	local_irq_save(flags);
2340	ace_mask_irq(dev);
2341
2342	for (i = 0; i < ACE_TX_RING_ENTRIES(ap); i++) {
2343		struct sk_buff *skb;
2344		struct tx_ring_info *info;
2345
2346		info = ap->skb->tx_skbuff + i;
2347		skb = info->skb;
2348
2349		if (dma_unmap_len(info, maplen)) {
2350			if (ACE_IS_TIGON_I(ap)) {
2351				/* NB: TIGON_1 is special, tx_ring is in io space */
2352				struct tx_desc __iomem *tx;
2353				tx = (__force struct tx_desc __iomem *) &ap->tx_ring[i];
2354				writel(0, &tx->addr.addrhi);
2355				writel(0, &tx->addr.addrlo);
2356				writel(0, &tx->flagsize);
2357			} else
2358				memset(ap->tx_ring + i, 0,
2359				       sizeof(struct tx_desc));
2360			pci_unmap_page(ap->pdev, dma_unmap_addr(info, mapping),
2361				       dma_unmap_len(info, maplen),
2362				       PCI_DMA_TODEVICE);
2363			dma_unmap_len_set(info, maplen, 0);
2364		}
2365		if (skb) {
2366			dev_kfree_skb(skb);
2367			info->skb = NULL;
2368		}
2369	}
2370
2371	if (ap->jumbo) {
2372		cmd.evt = C_RESET_JUMBO_RNG;
2373		cmd.code = 0;
2374		cmd.idx = 0;
2375		ace_issue_cmd(regs, &cmd);
2376	}
2377
2378	ace_unmask_irq(dev);
2379	local_irq_restore(flags);
2380
2381	return 0;
2382}
2383
2384
2385static inline dma_addr_t
2386ace_map_tx_skb(struct ace_private *ap, struct sk_buff *skb,
2387	       struct sk_buff *tail, u32 idx)
2388{
2389	dma_addr_t mapping;
2390	struct tx_ring_info *info;
2391
2392	mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
2393			       offset_in_page(skb->data),
2394			       skb->len, PCI_DMA_TODEVICE);
2395
2396	info = ap->skb->tx_skbuff + idx;
2397	info->skb = tail;
2398	dma_unmap_addr_set(info, mapping, mapping);
2399	dma_unmap_len_set(info, maplen, skb->len);
2400	return mapping;
2401}
2402
2403
2404static inline void
2405ace_load_tx_bd(struct ace_private *ap, struct tx_desc *desc, u64 addr,
2406	       u32 flagsize, u32 vlan_tag)
2407{
2408#if !USE_TX_COAL_NOW
2409	flagsize &= ~BD_FLG_COAL_NOW;
2410#endif
2411
2412	if (ACE_IS_TIGON_I(ap)) {
2413		struct tx_desc __iomem *io = (__force struct tx_desc __iomem *) desc;
2414		writel(addr >> 32, &io->addr.addrhi);
2415		writel(addr & 0xffffffff, &io->addr.addrlo);
2416		writel(flagsize, &io->flagsize);
2417		writel(vlan_tag, &io->vlanres);
2418	} else {
2419		desc->addr.addrhi = addr >> 32;
2420		desc->addr.addrlo = addr;
2421		desc->flagsize = flagsize;
2422		desc->vlanres = vlan_tag;
2423	}
2424}
2425
2426
2427static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
2428				  struct net_device *dev)
2429{
2430	struct ace_private *ap = netdev_priv(dev);
2431	struct ace_regs __iomem *regs = ap->regs;
2432	struct tx_desc *desc;
2433	u32 idx, flagsize;
2434	unsigned long maxjiff = jiffies + 3*HZ;
2435
2436restart:
2437	idx = ap->tx_prd;
2438
2439	if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2440		goto overflow;
2441
2442	if (!skb_shinfo(skb)->nr_frags)	{
2443		dma_addr_t mapping;
2444		u32 vlan_tag = 0;
2445
2446		mapping = ace_map_tx_skb(ap, skb, skb, idx);
2447		flagsize = (skb->len << 16) | (BD_FLG_END);
2448		if (skb->ip_summed == CHECKSUM_PARTIAL)
2449			flagsize |= BD_FLG_TCP_UDP_SUM;
2450		if (vlan_tx_tag_present(skb)) {
2451			flagsize |= BD_FLG_VLAN_TAG;
2452			vlan_tag = vlan_tx_tag_get(skb);
2453		}
2454		desc = ap->tx_ring + idx;
2455		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2456
2457		/* Look at ace_tx_int for explanations. */
2458		if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2459			flagsize |= BD_FLG_COAL_NOW;
2460
2461		ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag);
2462	} else {
2463		dma_addr_t mapping;
2464		u32 vlan_tag = 0;
2465		int i, len = 0;
2466
2467		mapping = ace_map_tx_skb(ap, skb, NULL, idx);
2468		flagsize = (skb_headlen(skb) << 16);
2469		if (skb->ip_summed == CHECKSUM_PARTIAL)
2470			flagsize |= BD_FLG_TCP_UDP_SUM;
2471		if (vlan_tx_tag_present(skb)) {
2472			flagsize |= BD_FLG_VLAN_TAG;
2473			vlan_tag = vlan_tx_tag_get(skb);
2474		}
2475
2476		ace_load_tx_bd(ap, ap->tx_ring + idx, mapping, flagsize, vlan_tag);
2477
2478		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2479
2480		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2481			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2482			struct tx_ring_info *info;
2483
2484			len += frag->size;
2485			info = ap->skb->tx_skbuff + idx;
2486			desc = ap->tx_ring + idx;
2487
2488			mapping = pci_map_page(ap->pdev, frag->page,
2489					       frag->page_offset, frag->size,
2490					       PCI_DMA_TODEVICE);
2491
2492			flagsize = (frag->size << 16);
2493			if (skb->ip_summed == CHECKSUM_PARTIAL)
2494				flagsize |= BD_FLG_TCP_UDP_SUM;
2495			idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2496
2497			if (i == skb_shinfo(skb)->nr_frags - 1) {
2498				flagsize |= BD_FLG_END;
2499				if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2500					flagsize |= BD_FLG_COAL_NOW;
2501
2502				/*
2503				 * Only the last fragment frees
2504				 * the skb!
2505				 */
2506				info->skb = skb;
2507			} else {
2508				info->skb = NULL;
2509			}
2510			dma_unmap_addr_set(info, mapping, mapping);
2511			dma_unmap_len_set(info, maplen, frag->size);
2512			ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag);
2513		}
2514	}
2515
2516 	wmb();
2517 	ap->tx_prd = idx;
2518 	ace_set_txprd(regs, ap, idx);
2519
2520	if (flagsize & BD_FLG_COAL_NOW) {
2521		netif_stop_queue(dev);
2522
2523		/*
2524		 * A TX-descriptor producer (an IRQ) might have gotten
2525		 * between, making the ring free again. Since xmit is
2526		 * serialized, this is the only situation we have to
2527		 * re-test.
2528		 */
2529		if (!tx_ring_full(ap, ap->tx_ret_csm, idx))
2530			netif_wake_queue(dev);
2531	}
2532
2533	return NETDEV_TX_OK;
2534
2535overflow:
2536	/*
2537	 * This race condition is unavoidable with lock-free drivers.
2538	 * We wake up the queue _before_ tx_prd is advanced, so that we can
2539	 * enter hard_start_xmit too early, while tx ring still looks closed.
2540	 * This happens ~1-4 times per 100000 packets, so that we can allow
2541	 * to loop syncing to other CPU. Probably, we need an additional
2542	 * wmb() in ace_tx_intr as well.
2543	 *
2544	 * Note that this race is relieved by reserving one more entry
2545	 * in tx ring than it is necessary (see original non-SG driver).
2546	 * However, with SG we need to reserve 2*MAX_SKB_FRAGS+1, which
2547	 * is already overkill.
2548	 *
2549	 * Alternative is to return with 1 not throttling queue. In this
2550	 * case loop becomes longer, no more useful effects.
2551	 */
2552	if (time_before(jiffies, maxjiff)) {
2553		barrier();
2554		cpu_relax();
2555		goto restart;
2556	}
2557
2558	/* The ring is stuck full. */
2559	printk(KERN_WARNING "%s: Transmit ring stuck full\n", dev->name);
2560	return NETDEV_TX_BUSY;
2561}
2562
2563
2564static int ace_change_mtu(struct net_device *dev, int new_mtu)
2565{
2566	struct ace_private *ap = netdev_priv(dev);
2567	struct ace_regs __iomem *regs = ap->regs;
2568
2569	if (new_mtu > ACE_JUMBO_MTU)
2570		return -EINVAL;
2571
2572	writel(new_mtu + ETH_HLEN + 4, &regs->IfMtu);
2573	dev->mtu = new_mtu;
2574
2575	if (new_mtu > ACE_STD_MTU) {
2576		if (!(ap->jumbo)) {
2577			printk(KERN_INFO "%s: Enabling Jumbo frame "
2578			       "support\n", dev->name);
2579			ap->jumbo = 1;
2580			if (!test_and_set_bit(0, &ap->jumbo_refill_busy))
2581				ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
2582			ace_set_rxtx_parms(dev, 1);
2583		}
2584	} else {
2585		while (test_and_set_bit(0, &ap->jumbo_refill_busy));
2586		ace_sync_irq(dev->irq);
2587		ace_set_rxtx_parms(dev, 0);
2588		if (ap->jumbo) {
2589			struct cmd cmd;
2590
2591			cmd.evt = C_RESET_JUMBO_RNG;
2592			cmd.code = 0;
2593			cmd.idx = 0;
2594			ace_issue_cmd(regs, &cmd);
2595		}
2596	}
2597
2598	return 0;
2599}
2600
2601static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
2602{
2603	struct ace_private *ap = netdev_priv(dev);
2604	struct ace_regs __iomem *regs = ap->regs;
2605	u32 link;
2606
2607	memset(ecmd, 0, sizeof(struct ethtool_cmd));
2608	ecmd->supported =
2609		(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
2610		 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
2611		 SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full |
2612		 SUPPORTED_Autoneg | SUPPORTED_FIBRE);
2613
2614	ecmd->port = PORT_FIBRE;
2615	ecmd->transceiver = XCVR_INTERNAL;
2616
2617	link = readl(&regs->GigLnkState);
2618	if (link & LNK_1000MB)
2619		ethtool_cmd_speed_set(ecmd, SPEED_1000);
2620	else {
2621		link = readl(&regs->FastLnkState);
2622		if (link & LNK_100MB)
2623			ethtool_cmd_speed_set(ecmd, SPEED_100);
2624		else if (link & LNK_10MB)
2625			ethtool_cmd_speed_set(ecmd, SPEED_10);
2626		else
2627			ethtool_cmd_speed_set(ecmd, 0);
2628	}
2629	if (link & LNK_FULL_DUPLEX)
2630		ecmd->duplex = DUPLEX_FULL;
2631	else
2632		ecmd->duplex = DUPLEX_HALF;
2633
2634	if (link & LNK_NEGOTIATE)
2635		ecmd->autoneg = AUTONEG_ENABLE;
2636	else
2637		ecmd->autoneg = AUTONEG_DISABLE;
2638
2639#if 0
2640	/*
2641	 * Current struct ethtool_cmd is insufficient
2642	 */
2643	ecmd->trace = readl(&regs->TuneTrace);
2644
2645	ecmd->txcoal = readl(&regs->TuneTxCoalTicks);
2646	ecmd->rxcoal = readl(&regs->TuneRxCoalTicks);
2647#endif
2648	ecmd->maxtxpkt = readl(&regs->TuneMaxTxDesc);
2649	ecmd->maxrxpkt = readl(&regs->TuneMaxRxDesc);
2650
2651	return 0;
2652}
2653
2654static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
2655{
2656	struct ace_private *ap = netdev_priv(dev);
2657	struct ace_regs __iomem *regs = ap->regs;
2658	u32 link, speed;
2659
2660	link = readl(&regs->GigLnkState);
2661	if (link & LNK_1000MB)
2662		speed = SPEED_1000;
2663	else {
2664		link = readl(&regs->FastLnkState);
2665		if (link & LNK_100MB)
2666			speed = SPEED_100;
2667		else if (link & LNK_10MB)
2668			speed = SPEED_10;
2669		else
2670			speed = SPEED_100;
2671	}
2672
2673	link = LNK_ENABLE | LNK_1000MB | LNK_100MB | LNK_10MB |
2674		LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL;
2675	if (!ACE_IS_TIGON_I(ap))
2676		link |= LNK_TX_FLOW_CTL_Y;
2677	if (ecmd->autoneg == AUTONEG_ENABLE)
2678		link |= LNK_NEGOTIATE;
2679	if (ethtool_cmd_speed(ecmd) != speed) {
2680		link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB);
2681		switch (ethtool_cmd_speed(ecmd)) {
2682		case SPEED_1000:
2683			link |= LNK_1000MB;
2684			break;
2685		case SPEED_100:
2686			link |= LNK_100MB;
2687			break;
2688		case SPEED_10:
2689			link |= LNK_10MB;
2690			break;
2691		}
2692	}
2693
2694	if (ecmd->duplex == DUPLEX_FULL)
2695		link |= LNK_FULL_DUPLEX;
2696
2697	if (link != ap->link) {
2698		struct cmd cmd;
2699		printk(KERN_INFO "%s: Renegotiating link state\n",
2700		       dev->name);
2701
2702		ap->link = link;
2703		writel(link, &regs->TuneLink);
2704		if (!ACE_IS_TIGON_I(ap))
2705			writel(link, &regs->TuneFastLink);
2706		wmb();
2707
2708		cmd.evt = C_LNK_NEGOTIATION;
2709		cmd.code = 0;
2710		cmd.idx = 0;
2711		ace_issue_cmd(regs, &cmd);
2712	}
2713	return 0;
2714}
2715
2716static void ace_get_drvinfo(struct net_device *dev,
2717			    struct ethtool_drvinfo *info)
2718{
2719	struct ace_private *ap = netdev_priv(dev);
2720
2721	strlcpy(info->driver, "acenic", sizeof(info->driver));
2722	snprintf(info->version, sizeof(info->version), "%i.%i.%i",
2723		 ap->firmware_major, ap->firmware_minor,
2724		 ap->firmware_fix);
2725
2726	if (ap->pdev)
2727		strlcpy(info->bus_info, pci_name(ap->pdev),
2728			sizeof(info->bus_info));
2729
2730}
2731
2732/*
2733 * Set the hardware MAC address.
2734 */
2735static int ace_set_mac_addr(struct net_device *dev, void *p)
2736{
2737	struct ace_private *ap = netdev_priv(dev);
2738	struct ace_regs __iomem *regs = ap->regs;
2739	struct sockaddr *addr=p;
2740	u8 *da;
2741	struct cmd cmd;
2742
2743	if(netif_running(dev))
2744		return -EBUSY;
2745
2746	memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
2747
2748	da = (u8 *)dev->dev_addr;
2749
2750	writel(da[0] << 8 | da[1], &regs->MacAddrHi);
2751	writel((da[2] << 24) | (da[3] << 16) | (da[4] << 8) | da[5],
2752	       &regs->MacAddrLo);
2753
2754	cmd.evt = C_SET_MAC_ADDR;
2755	cmd.code = 0;
2756	cmd.idx = 0;
2757	ace_issue_cmd(regs, &cmd);
2758
2759	return 0;
2760}
2761
2762
2763static void ace_set_multicast_list(struct net_device *dev)
2764{
2765	struct ace_private *ap = netdev_priv(dev);
2766	struct ace_regs __iomem *regs = ap->regs;
2767	struct cmd cmd;
2768
2769	if ((dev->flags & IFF_ALLMULTI) && !(ap->mcast_all)) {
2770		cmd.evt = C_SET_MULTICAST_MODE;
2771		cmd.code = C_C_MCAST_ENABLE;
2772		cmd.idx = 0;
2773		ace_issue_cmd(regs, &cmd);
2774		ap->mcast_all = 1;
2775	} else if (ap->mcast_all) {
2776		cmd.evt = C_SET_MULTICAST_MODE;
2777		cmd.code = C_C_MCAST_DISABLE;
2778		cmd.idx = 0;
2779		ace_issue_cmd(regs, &cmd);
2780		ap->mcast_all = 0;
2781	}
2782
2783	if ((dev->flags & IFF_PROMISC) && !(ap->promisc)) {
2784		cmd.evt = C_SET_PROMISC_MODE;
2785		cmd.code = C_C_PROMISC_ENABLE;
2786		cmd.idx = 0;
2787		ace_issue_cmd(regs, &cmd);
2788		ap->promisc = 1;
2789	}else if (!(dev->flags & IFF_PROMISC) && (ap->promisc)) {
2790		cmd.evt = C_SET_PROMISC_MODE;
2791		cmd.code = C_C_PROMISC_DISABLE;
2792		cmd.idx = 0;
2793		ace_issue_cmd(regs, &cmd);
2794		ap->promisc = 0;
2795	}
2796
2797	/*
2798	 * For the time being multicast relies on the upper layers
2799	 * filtering it properly. The Firmware does not allow one to
2800	 * set the entire multicast list at a time and keeping track of
2801	 * it here is going to be messy.
2802	 */
2803	if (!netdev_mc_empty(dev) && !ap->mcast_all) {
2804		cmd.evt = C_SET_MULTICAST_MODE;
2805		cmd.code = C_C_MCAST_ENABLE;
2806		cmd.idx = 0;
2807		ace_issue_cmd(regs, &cmd);
2808	}else if (!ap->mcast_all) {
2809		cmd.evt = C_SET_MULTICAST_MODE;
2810		cmd.code = C_C_MCAST_DISABLE;
2811		cmd.idx = 0;
2812		ace_issue_cmd(regs, &cmd);
2813	}
2814}
2815
2816
2817static struct net_device_stats *ace_get_stats(struct net_device *dev)
2818{
2819	struct ace_private *ap = netdev_priv(dev);
2820	struct ace_mac_stats __iomem *mac_stats =
2821		(struct ace_mac_stats __iomem *)ap->regs->Stats;
2822
2823	dev->stats.rx_missed_errors = readl(&mac_stats->drop_space);
2824	dev->stats.multicast = readl(&mac_stats->kept_mc);
2825	dev->stats.collisions = readl(&mac_stats->coll);
2826
2827	return &dev->stats;
2828}
2829
2830
2831static void __devinit ace_copy(struct ace_regs __iomem *regs, const __be32 *src,
2832			       u32 dest, int size)
2833{
2834	void __iomem *tdest;
2835	short tsize, i;
2836
2837	if (size <= 0)
2838		return;
2839
2840	while (size > 0) {
2841		tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
2842			    min_t(u32, size, ACE_WINDOW_SIZE));
2843		tdest = (void __iomem *) &regs->Window +
2844			(dest & (ACE_WINDOW_SIZE - 1));
2845		writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);
2846		for (i = 0; i < (tsize / 4); i++) {
2847			/* Firmware is big-endian */
2848			writel(be32_to_cpup(src), tdest);
2849			src++;
2850			tdest += 4;
2851			dest += 4;
2852			size -= 4;
2853		}
2854	}
2855}
2856
2857
2858static void __devinit ace_clear(struct ace_regs __iomem *regs, u32 dest, int size)
2859{
2860	void __iomem *tdest;
2861	short tsize = 0, i;
2862
2863	if (size <= 0)
2864		return;
2865
2866	while (size > 0) {
2867		tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
2868				min_t(u32, size, ACE_WINDOW_SIZE));
2869		tdest = (void __iomem *) &regs->Window +
2870			(dest & (ACE_WINDOW_SIZE - 1));
2871		writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);
2872
2873		for (i = 0; i < (tsize / 4); i++) {
2874			writel(0, tdest + i*4);
2875		}
2876
2877		dest += tsize;
2878		size -= tsize;
2879	}
2880}
2881
2882
2883/*
2884 * Download the firmware into the SRAM on the NIC
2885 *
2886 * This operation requires the NIC to be halted and is performed with
2887 * interrupts disabled and with the spinlock hold.
2888 */
2889static int __devinit ace_load_firmware(struct net_device *dev)
2890{
2891	const struct firmware *fw;
2892	const char *fw_name = "acenic/tg2.bin";
2893	struct ace_private *ap = netdev_priv(dev);
2894	struct ace_regs __iomem *regs = ap->regs;
2895	const __be32 *fw_data;
2896	u32 load_addr;
2897	int ret;
2898
2899	if (!(readl(&regs->CpuCtrl) & CPU_HALTED)) {
2900		printk(KERN_ERR "%s: trying to download firmware while the "
2901		       "CPU is running!\n", ap->name);
2902		return -EFAULT;
2903	}
2904
2905	if (ACE_IS_TIGON_I(ap))
2906		fw_name = "acenic/tg1.bin";
2907
2908	ret = request_firmware(&fw, fw_name, &ap->pdev->dev);
2909	if (ret) {
2910		printk(KERN_ERR "%s: Failed to load firmware \"%s\"\n",
2911		       ap->name, fw_name);
2912		return ret;
2913	}
2914
2915	fw_data = (void *)fw->data;
2916
2917	/* Firmware blob starts with version numbers, followed by
2918	   load and start address. Remainder is the blob to be loaded
2919	   contiguously from load address. We don't bother to represent
2920	   the BSS/SBSS sections any more, since we were clearing the
2921	   whole thing anyway. */
2922	ap->firmware_major = fw->data[0];
2923	ap->firmware_minor = fw->data[1];
2924	ap->firmware_fix = fw->data[2];
2925
2926	ap->firmware_start = be32_to_cpu(fw_data[1]);
2927	if (ap->firmware_start < 0x4000 || ap->firmware_start >= 0x80000) {
2928		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
2929		       ap->name, ap->firmware_start, fw_name);
2930		ret = -EINVAL;
2931		goto out;
2932	}
2933
2934	load_addr = be32_to_cpu(fw_data[2]);
2935	if (load_addr < 0x4000 || load_addr >= 0x80000) {
2936		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
2937		       ap->name, load_addr, fw_name);
2938		ret = -EINVAL;
2939		goto out;
2940	}
2941
2942	/*
2943	 * Do not try to clear more than 512KiB or we end up seeing
2944	 * funny things on NICs with only 512KiB SRAM
2945	 */
2946	ace_clear(regs, 0x2000, 0x80000-0x2000);
2947	ace_copy(regs, &fw_data[3], load_addr, fw->size-12);
2948 out:
2949	release_firmware(fw);
2950	return ret;
2951}
2952
2953
2954/*
2955 * The eeprom on the AceNIC is an Atmel i2c EEPROM.
2956 *
2957 * Accessing the EEPROM is `interesting' to say the least - don't read
2958 * this code right after dinner.
2959 *
2960 * This is all about black magic and bit-banging the device .... I
2961 * wonder in what hospital they have put the guy who designed the i2c
2962 * specs.
2963 *
2964 * Oh yes, this is only the beginning!
2965 *
2966 * Thanks to Stevarino Webinski for helping tracking down the bugs in the
2967 * code i2c readout code by beta testing all my hacks.
2968 */
2969static void __devinit eeprom_start(struct ace_regs __iomem *regs)
2970{
2971	u32 local;
2972
2973	readl(&regs->LocalCtrl);
2974	udelay(ACE_SHORT_DELAY);
2975	local = readl(&regs->LocalCtrl);
2976	local |= EEPROM_DATA_OUT | EEPROM_WRITE_ENABLE;
2977	writel(local, &regs->LocalCtrl);
2978	readl(&regs->LocalCtrl);
2979	mb();
2980	udelay(ACE_SHORT_DELAY);
2981	local |= EEPROM_CLK_OUT;
2982	writel(local, &regs->LocalCtrl);
2983	readl(&regs->LocalCtrl);
2984	mb();
2985	udelay(ACE_SHORT_DELAY);
2986	local &= ~EEPROM_DATA_OUT;
2987	writel(local, &regs->LocalCtrl);
2988	readl(&regs->LocalCtrl);
2989	mb();
2990	udelay(ACE_SHORT_DELAY);
2991	local &= ~EEPROM_CLK_OUT;
2992	writel(local, &regs->LocalCtrl);
2993	readl(&regs->LocalCtrl);
2994	mb();
2995}
2996
2997
2998static void __devinit eeprom_prep(struct ace_regs __iomem *regs, u8 magic)
2999{
3000	short i;
3001	u32 local;
3002
3003	udelay(ACE_SHORT_DELAY);
3004	local = readl(&regs->LocalCtrl);
3005	local &= ~EEPROM_DATA_OUT;
3006	local |= EEPROM_WRITE_ENABLE;
3007	writel(local, &regs->LocalCtrl);
3008	readl(&regs->LocalCtrl);
3009	mb();
3010
3011	for (i = 0; i < 8; i++, magic <<= 1) {
3012		udelay(ACE_SHORT_DELAY);
3013		if (magic & 0x80)
3014			local |= EEPROM_DATA_OUT;
3015		else
3016			local &= ~EEPROM_DATA_OUT;
3017		writel(local, &regs->LocalCtrl);
3018		readl(&regs->LocalCtrl);
3019		mb();
3020
3021		udelay(ACE_SHORT_DELAY);
3022		local |= EEPROM_CLK_OUT;
3023		writel(local, &regs->LocalCtrl);
3024		readl(&regs->LocalCtrl);
3025		mb();
3026		udelay(ACE_SHORT_DELAY);
3027		local &= ~(EEPROM_CLK_OUT | EEPROM_DATA_OUT);
3028		writel(local, &regs->LocalCtrl);
3029		readl(&regs->LocalCtrl);
3030		mb();
3031	}
3032}
3033
3034
3035static int __devinit eeprom_check_ack(struct ace_regs __iomem *regs)
3036{
3037	int state;
3038	u32 local;
3039
3040	local = readl(&regs->LocalCtrl);
3041	local &= ~EEPROM_WRITE_ENABLE;
3042	writel(local, &regs->LocalCtrl);
3043	readl(&regs->LocalCtrl);
3044	mb();
3045	udelay(ACE_LONG_DELAY);
3046	local |= EEPROM_CLK_OUT;
3047	writel(local, &regs->LocalCtrl);
3048	readl(&regs->LocalCtrl);
3049	mb();
3050	udelay(ACE_SHORT_DELAY);
3051	/* sample data in middle of high clk */
3052	state = (readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0;
3053	udelay(ACE_SHORT_DELAY);
3054	mb();
3055	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
3056	readl(&regs->LocalCtrl);
3057	mb();
3058
3059	return state;
3060}
3061
3062
3063static void __devinit eeprom_stop(struct ace_regs __iomem *regs)
3064{
3065	u32 local;
3066
3067	udelay(ACE_SHORT_DELAY);
3068	local = readl(&regs->LocalCtrl);
3069	local |= EEPROM_WRITE_ENABLE;
3070	writel(local, &regs->LocalCtrl);
3071	readl(&regs->LocalCtrl);
3072	mb();
3073	udelay(ACE_SHORT_DELAY);
3074	local &= ~EEPROM_DATA_OUT;
3075	writel(local, &regs->LocalCtrl);
3076	readl(&regs->LocalCtrl);
3077	mb();
3078	udelay(ACE_SHORT_DELAY);
3079	local |= EEPROM_CLK_OUT;
3080	writel(local, &regs->LocalCtrl);
3081	readl(&regs->LocalCtrl);
3082	mb();
3083	udelay(ACE_SHORT_DELAY);
3084	local |= EEPROM_DATA_OUT;
3085	writel(local, &regs->LocalCtrl);
3086	readl(&regs->LocalCtrl);
3087	mb();
3088	udelay(ACE_LONG_DELAY);
3089	local &= ~EEPROM_CLK_OUT;
3090	writel(local, &regs->LocalCtrl);
3091	mb();
3092}
3093
3094
3095/*
3096 * Read a whole byte from the EEPROM.
3097 */
3098static int __devinit read_eeprom_byte(struct net_device *dev,
3099				   unsigned long offset)
3100{
3101	struct ace_private *ap = netdev_priv(dev);
3102	struct ace_regs __iomem *regs = ap->regs;
3103	unsigned long flags;
3104	u32 local;
3105	int result = 0;
3106	short i;
3107
3108	/*
3109	 * Don't take interrupts on this CPU will bit banging
3110	 * the %#%#@$ I2C device
3111	 */
3112	local_irq_save(flags);
3113
3114	eeprom_start(regs);
3115
3116	eeprom_prep(regs, EEPROM_WRITE_SELECT);
3117	if (eeprom_check_ack(regs)) {
3118		local_irq_restore(flags);
3119		printk(KERN_ERR "%s: Unable to sync eeprom\n", ap->name);
3120		result = -EIO;
3121		goto eeprom_read_error;
3122	}
3123
3124	eeprom_prep(regs, (offset >> 8) & 0xff);
3125	if (eeprom_check_ack(regs)) {
3126		local_irq_restore(flags);
3127		printk(KERN_ERR "%s: Unable to set address byte 0\n",
3128		       ap->name);
3129		result = -EIO;
3130		goto eeprom_read_error;
3131	}
3132
3133	eeprom_prep(regs, offset & 0xff);
3134	if (eeprom_check_ack(regs)) {
3135		local_irq_restore(flags);
3136		printk(KERN_ERR "%s: Unable to set address byte 1\n",
3137		       ap->name);
3138		result = -EIO;
3139		goto eeprom_read_error;
3140	}
3141
3142	eeprom_start(regs);
3143	eeprom_prep(regs, EEPROM_READ_SELECT);
3144	if (eeprom_check_ack(regs)) {
3145		local_irq_restore(flags);
3146		printk(KERN_ERR "%s: Unable to set READ_SELECT\n",
3147		       ap->name);
3148		result = -EIO;
3149		goto eeprom_read_error;
3150	}
3151
3152	for (i = 0; i < 8; i++) {
3153		local = readl(&regs->LocalCtrl);
3154		local &= ~EEPROM_WRITE_ENABLE;
3155		writel(local, &regs->LocalCtrl);
3156		readl(&regs->LocalCtrl);
3157		udelay(ACE_LONG_DELAY);
3158		mb();
3159		local |= EEPROM_CLK_OUT;
3160		writel(local, &regs->LocalCtrl);
3161		readl(&regs->LocalCtrl);
3162		mb();
3163		udelay(ACE_SHORT_DELAY);
3164		/* sample data mid high clk */
3165		result = (result << 1) |
3166			((readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0);
3167		udelay(ACE_SHORT_DELAY);
3168		mb();
3169		local = readl(&regs->LocalCtrl);
3170		local &= ~EEPROM_CLK_OUT;
3171		writel(local, &regs->LocalCtrl);
3172		readl(&regs->LocalCtrl);
3173		udelay(ACE_SHORT_DELAY);
3174		mb();
3175		if (i == 7) {
3176			local |= EEPROM_WRITE_ENABLE;
3177			writel(local, &regs->LocalCtrl);
3178			readl(&regs->LocalCtrl);
3179			mb();
3180			udelay(ACE_SHORT_DELAY);
3181		}
3182	}
3183
3184	local |= EEPROM_DATA_OUT;
3185	writel(local, &regs->LocalCtrl);
3186	readl(&regs->LocalCtrl);
3187	mb();
3188	udelay(ACE_SHORT_DELAY);
3189	writel(readl(&regs->LocalCtrl) | EEPROM_CLK_OUT, &regs->LocalCtrl);
3190	readl(&regs->LocalCtrl);
3191	udelay(ACE_LONG_DELAY);
3192	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
3193	readl(&regs->LocalCtrl);
3194	mb();
3195	udelay(ACE_SHORT_DELAY);
3196	eeprom_stop(regs);
3197
3198	local_irq_restore(flags);
3199 out:
3200	return result;
3201
3202 eeprom_read_error:
3203	printk(KERN_ERR "%s: Unable to read eeprom byte 0x%02lx\n",
3204	       ap->name, offset);
3205	goto out;
3206}