Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * acenic.c: Linux driver for the Alteon AceNIC Gigabit Ethernet card
   4 *           and other Tigon based cards.
   5 *
   6 * Copyright 1998-2002 by Jes Sorensen, <jes@trained-monkey.org>.
   7 *
   8 * Thanks to Alteon and 3Com for providing hardware and documentation
   9 * enabling me to write this driver.
  10 *
  11 * A mailing list for discussing the use of this driver has been
  12 * setup, please subscribe to the lists if you have any questions
  13 * about the driver. Send mail to linux-acenic-help@sunsite.auc.dk to
  14 * see how to subscribe.
  15 *
 
 
 
 
 
  16 * Additional credits:
  17 *   Pete Wyckoff <wyckoff@ca.sandia.gov>: Initial Linux/Alpha and trace
  18 *       dump support. The trace dump support has not been
  19 *       integrated yet however.
  20 *   Troy Benjegerdes: Big Endian (PPC) patches.
  21 *   Nate Stahl: Better out of memory handling and stats support.
  22 *   Aman Singla: Nasty race between interrupt handler and tx code dealing
  23 *                with 'testing the tx_ret_csm and setting tx_full'
  24 *   David S. Miller <davem@redhat.com>: conversion to new PCI dma mapping
  25 *                                       infrastructure and Sparc support
  26 *   Pierrick Pinasseau (CERN): For lending me an Ultra 5 to test the
  27 *                              driver under Linux/Sparc64
  28 *   Matt Domsch <Matt_Domsch@dell.com>: Detect Alteon 1000baseT cards
  29 *                                       ETHTOOL_GDRVINFO support
  30 *   Chip Salzenberg <chip@valinux.com>: Fix race condition between tx
  31 *                                       handler and close() cleanup.
  32 *   Ken Aaker <kdaaker@rchland.vnet.ibm.com>: Correct check for whether
  33 *                                       memory mapped IO is enabled to
  34 *                                       make the driver work on RS/6000.
  35 *   Takayoshi Kouchi <kouchi@hpc.bs1.fc.nec.co.jp>: Identifying problem
  36 *                                       where the driver would disable
  37 *                                       bus master mode if it had to disable
  38 *                                       write and invalidate.
  39 *   Stephen Hack <stephen_hack@hp.com>: Fixed ace_set_mac_addr for little
  40 *                                       endian systems.
  41 *   Val Henson <vhenson@esscom.com>:    Reset Jumbo skb producer and
  42 *                                       rx producer index when
  43 *                                       flushing the Jumbo ring.
  44 *   Hans Grobler <grobh@sun.ac.za>:     Memory leak fixes in the
  45 *                                       driver init path.
  46 *   Grant Grundler <grundler@cup.hp.com>: PCI write posting fixes.
  47 */
  48
  49#include <linux/module.h>
  50#include <linux/moduleparam.h>
  51#include <linux/types.h>
  52#include <linux/errno.h>
  53#include <linux/ioport.h>
  54#include <linux/pci.h>
  55#include <linux/dma-mapping.h>
  56#include <linux/kernel.h>
  57#include <linux/netdevice.h>
  58#include <linux/etherdevice.h>
  59#include <linux/skbuff.h>
  60#include <linux/delay.h>
  61#include <linux/mm.h>
  62#include <linux/highmem.h>
  63#include <linux/sockios.h>
  64#include <linux/firmware.h>
  65#include <linux/slab.h>
  66#include <linux/prefetch.h>
  67#include <linux/if_vlan.h>
  68
  69#ifdef SIOCETHTOOL
  70#include <linux/ethtool.h>
  71#endif
  72
  73#include <net/sock.h>
  74#include <net/ip.h>
  75
  76#include <asm/io.h>
  77#include <asm/irq.h>
  78#include <asm/byteorder.h>
  79#include <linux/uaccess.h>
  80
  81
  82#define DRV_NAME "acenic"
  83
  84#undef INDEX_DEBUG
  85
  86#ifdef CONFIG_ACENIC_OMIT_TIGON_I
  87#define ACE_IS_TIGON_I(ap)	0
  88#define ACE_TX_RING_ENTRIES(ap)	MAX_TX_RING_ENTRIES
  89#else
  90#define ACE_IS_TIGON_I(ap)	(ap->version == 1)
  91#define ACE_TX_RING_ENTRIES(ap)	ap->tx_ring_entries
  92#endif
  93
  94#ifndef PCI_VENDOR_ID_ALTEON
  95#define PCI_VENDOR_ID_ALTEON		0x12ae
  96#endif
  97#ifndef PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE
  98#define PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE  0x0001
  99#define PCI_DEVICE_ID_ALTEON_ACENIC_COPPER 0x0002
 100#endif
 101#ifndef PCI_DEVICE_ID_3COM_3C985
 102#define PCI_DEVICE_ID_3COM_3C985	0x0001
 103#endif
 104#ifndef PCI_VENDOR_ID_NETGEAR
 105#define PCI_VENDOR_ID_NETGEAR		0x1385
 106#define PCI_DEVICE_ID_NETGEAR_GA620	0x620a
 107#endif
 108#ifndef PCI_DEVICE_ID_NETGEAR_GA620T
 109#define PCI_DEVICE_ID_NETGEAR_GA620T	0x630a
 110#endif
 111
 112
 113/*
 114 * Farallon used the DEC vendor ID by mistake and they seem not
 115 * to care - stinky!
 116 */
 117#ifndef PCI_DEVICE_ID_FARALLON_PN9000SX
 118#define PCI_DEVICE_ID_FARALLON_PN9000SX	0x1a
 119#endif
 120#ifndef PCI_DEVICE_ID_FARALLON_PN9100T
 121#define PCI_DEVICE_ID_FARALLON_PN9100T  0xfa
 122#endif
 123#ifndef PCI_VENDOR_ID_SGI
 124#define PCI_VENDOR_ID_SGI		0x10a9
 125#endif
 126#ifndef PCI_DEVICE_ID_SGI_ACENIC
 127#define PCI_DEVICE_ID_SGI_ACENIC	0x0009
 128#endif
 129
 130static const struct pci_device_id acenic_pci_tbl[] = {
 131	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE,
 132	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 133	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_COPPER,
 134	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 135	{ PCI_VENDOR_ID_3COM, PCI_DEVICE_ID_3COM_3C985,
 136	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 137	{ PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620,
 138	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 139	{ PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620T,
 140	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 141	/*
 142	 * Farallon used the DEC vendor ID on their cards incorrectly,
 143	 * then later Alteon's ID.
 144	 */
 145	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_FARALLON_PN9000SX,
 146	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 147	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_FARALLON_PN9100T,
 148	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 149	{ PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_ACENIC,
 150	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 151	{ }
 152};
 153MODULE_DEVICE_TABLE(pci, acenic_pci_tbl);
 154
 155#define ace_sync_irq(irq)	synchronize_irq(irq)
 156
 157#ifndef offset_in_page
 158#define offset_in_page(ptr)	((unsigned long)(ptr) & ~PAGE_MASK)
 159#endif
 160
 161#define ACE_MAX_MOD_PARMS	8
 162#define BOARD_IDX_STATIC	0
 163#define BOARD_IDX_OVERFLOW	-1
 164
 165#include "acenic.h"
 166
 167/*
 168 * These must be defined before the firmware is included.
 169 */
 170#define MAX_TEXT_LEN	96*1024
 171#define MAX_RODATA_LEN	8*1024
 172#define MAX_DATA_LEN	2*1024
 173
 174#ifndef tigon2FwReleaseLocal
 175#define tigon2FwReleaseLocal 0
 176#endif
 177
 178/*
 179 * This driver currently supports Tigon I and Tigon II based cards
 180 * including the Alteon AceNIC, the 3Com 3C985[B] and NetGear
 181 * GA620. The driver should also work on the SGI, DEC and Farallon
 182 * versions of the card, however I have not been able to test that
 183 * myself.
 184 *
 185 * This card is really neat, it supports receive hardware checksumming
 186 * and jumbo frames (up to 9000 bytes) and does a lot of work in the
 187 * firmware. Also the programming interface is quite neat, except for
 188 * the parts dealing with the i2c eeprom on the card ;-)
 189 *
 190 * Using jumbo frames:
 191 *
 192 * To enable jumbo frames, simply specify an mtu between 1500 and 9000
 193 * bytes to ifconfig. Jumbo frames can be enabled or disabled at any time
 194 * by running `ifconfig eth<X> mtu <MTU>' with <X> being the Ethernet
 195 * interface number and <MTU> being the MTU value.
 196 *
 197 * Module parameters:
 198 *
 199 * When compiled as a loadable module, the driver allows for a number
 200 * of module parameters to be specified. The driver supports the
 201 * following module parameters:
 202 *
 203 *  trace=<val> - Firmware trace level. This requires special traced
 204 *                firmware to replace the firmware supplied with
 205 *                the driver - for debugging purposes only.
 206 *
 207 *  link=<val>  - Link state. Normally you want to use the default link
 208 *                parameters set by the driver. This can be used to
 209 *                override these in case your switch doesn't negotiate
 210 *                the link properly. Valid values are:
 211 *         0x0001 - Force half duplex link.
 212 *         0x0002 - Do not negotiate line speed with the other end.
 213 *         0x0010 - 10Mbit/sec link.
 214 *         0x0020 - 100Mbit/sec link.
 215 *         0x0040 - 1000Mbit/sec link.
 216 *         0x0100 - Do not negotiate flow control.
 217 *         0x0200 - Enable RX flow control Y
 218 *         0x0400 - Enable TX flow control Y (Tigon II NICs only).
 219 *                Default value is 0x0270, ie. enable link+flow
 220 *                control negotiation. Negotiating the highest
 221 *                possible link speed with RX flow control enabled.
 222 *
 223 *                When disabling link speed negotiation, only one link
 224 *                speed is allowed to be specified!
 225 *
 226 *  tx_coal_tick=<val> - number of coalescing clock ticks (us) allowed
 227 *                to wait for more packets to arive before
 228 *                interrupting the host, from the time the first
 229 *                packet arrives.
 230 *
 231 *  rx_coal_tick=<val> - number of coalescing clock ticks (us) allowed
 232 *                to wait for more packets to arive in the transmit ring,
 233 *                before interrupting the host, after transmitting the
 234 *                first packet in the ring.
 235 *
 236 *  max_tx_desc=<val> - maximum number of transmit descriptors
 237 *                (packets) transmitted before interrupting the host.
 238 *
 239 *  max_rx_desc=<val> - maximum number of receive descriptors
 240 *                (packets) received before interrupting the host.
 241 *
 242 *  tx_ratio=<val> - 7 bit value (0 - 63) specifying the split in 64th
 243 *                increments of the NIC's on board memory to be used for
 244 *                transmit and receive buffers. For the 1MB NIC app. 800KB
 245 *                is available, on the 1/2MB NIC app. 300KB is available.
 246 *                68KB will always be available as a minimum for both
 247 *                directions. The default value is a 50/50 split.
 248 *  dis_pci_mem_inval=<val> - disable PCI memory write and invalidate
 249 *                operations, default (1) is to always disable this as
 250 *                that is what Alteon does on NT. I have not been able
 251 *                to measure any real performance differences with
 252 *                this on my systems. Set <val>=0 if you want to
 253 *                enable these operations.
 254 *
 255 * If you use more than one NIC, specify the parameters for the
 256 * individual NICs with a comma, ie. trace=0,0x00001fff,0 you want to
 257 * run tracing on NIC #2 but not on NIC #1 and #3.
 258 *
 259 * TODO:
 260 *
 261 * - Proper multicast support.
 262 * - NIC dump support.
 263 * - More tuning parameters.
 264 *
 265 * The mini ring is not used under Linux and I am not sure it makes sense
 266 * to actually use it.
 267 *
 268 * New interrupt handler strategy:
 269 *
 270 * The old interrupt handler worked using the traditional method of
 271 * replacing an skbuff with a new one when a packet arrives. However
 272 * the rx rings do not need to contain a static number of buffer
 273 * descriptors, thus it makes sense to move the memory allocation out
 274 * of the main interrupt handler and do it in a bottom half handler
 275 * and only allocate new buffers when the number of buffers in the
 276 * ring is below a certain threshold. In order to avoid starving the
 277 * NIC under heavy load it is however necessary to force allocation
 278 * when hitting a minimum threshold. The strategy for alloction is as
 279 * follows:
 280 *
 281 *     RX_LOW_BUF_THRES    - allocate buffers in the bottom half
 282 *     RX_PANIC_LOW_THRES  - we are very low on buffers, allocate
 283 *                           the buffers in the interrupt handler
 284 *     RX_RING_THRES       - maximum number of buffers in the rx ring
 285 *     RX_MINI_THRES       - maximum number of buffers in the mini ring
 286 *     RX_JUMBO_THRES      - maximum number of buffers in the jumbo ring
 287 *
 288 * One advantagous side effect of this allocation approach is that the
 289 * entire rx processing can be done without holding any spin lock
 290 * since the rx rings and registers are totally independent of the tx
 291 * ring and its registers.  This of course includes the kmalloc's of
 292 * new skb's. Thus start_xmit can run in parallel with rx processing
 293 * and the memory allocation on SMP systems.
 294 *
 295 * Note that running the skb reallocation in a bottom half opens up
 296 * another can of races which needs to be handled properly. In
 297 * particular it can happen that the interrupt handler tries to run
 298 * the reallocation while the bottom half is either running on another
 299 * CPU or was interrupted on the same CPU. To get around this the
 300 * driver uses bitops to prevent the reallocation routines from being
 301 * reentered.
 302 *
 303 * TX handling can also be done without holding any spin lock, wheee
 304 * this is fun! since tx_ret_csm is only written to by the interrupt
 305 * handler. The case to be aware of is when shutting down the device
 306 * and cleaning up where it is necessary to make sure that
 307 * start_xmit() is not running while this is happening. Well DaveM
 308 * informs me that this case is already protected against ... bye bye
 309 * Mr. Spin Lock, it was nice to know you.
 310 *
 311 * TX interrupts are now partly disabled so the NIC will only generate
 312 * TX interrupts for the number of coal ticks, not for the number of
 313 * TX packets in the queue. This should reduce the number of TX only,
 314 * ie. when no RX processing is done, interrupts seen.
 315 */
 316
 317/*
 318 * Threshold values for RX buffer allocation - the low water marks for
 319 * when to start refilling the rings are set to 75% of the ring
 320 * sizes. It seems to make sense to refill the rings entirely from the
 321 * intrrupt handler once it gets below the panic threshold, that way
 322 * we don't risk that the refilling is moved to another CPU when the
 323 * one running the interrupt handler just got the slab code hot in its
 324 * cache.
 325 */
 326#define RX_RING_SIZE		72
 327#define RX_MINI_SIZE		64
 328#define RX_JUMBO_SIZE		48
 329
 330#define RX_PANIC_STD_THRES	16
 331#define RX_PANIC_STD_REFILL	(3*RX_PANIC_STD_THRES)/2
 332#define RX_LOW_STD_THRES	(3*RX_RING_SIZE)/4
 333#define RX_PANIC_MINI_THRES	12
 334#define RX_PANIC_MINI_REFILL	(3*RX_PANIC_MINI_THRES)/2
 335#define RX_LOW_MINI_THRES	(3*RX_MINI_SIZE)/4
 336#define RX_PANIC_JUMBO_THRES	6
 337#define RX_PANIC_JUMBO_REFILL	(3*RX_PANIC_JUMBO_THRES)/2
 338#define RX_LOW_JUMBO_THRES	(3*RX_JUMBO_SIZE)/4
 339
 340
 341/*
 342 * Size of the mini ring entries, basically these just should be big
 343 * enough to take TCP ACKs
 344 */
 345#define ACE_MINI_SIZE		100
 346
 347#define ACE_MINI_BUFSIZE	ACE_MINI_SIZE
 348#define ACE_STD_BUFSIZE		(ACE_STD_MTU + ETH_HLEN + 4)
 349#define ACE_JUMBO_BUFSIZE	(ACE_JUMBO_MTU + ETH_HLEN + 4)
 350
 351/*
 352 * There seems to be a magic difference in the effect between 995 and 996
 353 * but little difference between 900 and 995 ... no idea why.
 354 *
 355 * There is now a default set of tuning parameters which is set, depending
 356 * on whether or not the user enables Jumbo frames. It's assumed that if
 357 * Jumbo frames are enabled, the user wants optimal tuning for that case.
 358 */
 359#define DEF_TX_COAL		400 /* 996 */
 360#define DEF_TX_MAX_DESC		60  /* was 40 */
 361#define DEF_RX_COAL		120 /* 1000 */
 362#define DEF_RX_MAX_DESC		25
 363#define DEF_TX_RATIO		21 /* 24 */
 364
 365#define DEF_JUMBO_TX_COAL	20
 366#define DEF_JUMBO_TX_MAX_DESC	60
 367#define DEF_JUMBO_RX_COAL	30
 368#define DEF_JUMBO_RX_MAX_DESC	6
 369#define DEF_JUMBO_TX_RATIO	21
 370
 371#if tigon2FwReleaseLocal < 20001118
 372/*
 373 * Standard firmware and early modifications duplicate
 374 * IRQ load without this flag (coal timer is never reset).
 375 * Note that with this flag tx_coal should be less than
 376 * time to xmit full tx ring.
 377 * 400usec is not so bad for tx ring size of 128.
 378 */
 379#define TX_COAL_INTS_ONLY	1	/* worth it */
 380#else
 381/*
 382 * With modified firmware, this is not necessary, but still useful.
 383 */
 384#define TX_COAL_INTS_ONLY	1
 385#endif
 386
 387#define DEF_TRACE		0
 388#define DEF_STAT		(2 * TICKS_PER_SEC)
 389
 390
 391static int link_state[ACE_MAX_MOD_PARMS];
 392static int trace[ACE_MAX_MOD_PARMS];
 393static int tx_coal_tick[ACE_MAX_MOD_PARMS];
 394static int rx_coal_tick[ACE_MAX_MOD_PARMS];
 395static int max_tx_desc[ACE_MAX_MOD_PARMS];
 396static int max_rx_desc[ACE_MAX_MOD_PARMS];
 397static int tx_ratio[ACE_MAX_MOD_PARMS];
 398static int dis_pci_mem_inval[ACE_MAX_MOD_PARMS] = {1, 1, 1, 1, 1, 1, 1, 1};
 399
 400MODULE_AUTHOR("Jes Sorensen <jes@trained-monkey.org>");
 401MODULE_LICENSE("GPL");
 402MODULE_DESCRIPTION("AceNIC/3C985/GA620 Gigabit Ethernet driver");
 403#ifndef CONFIG_ACENIC_OMIT_TIGON_I
 404MODULE_FIRMWARE("acenic/tg1.bin");
 405#endif
 406MODULE_FIRMWARE("acenic/tg2.bin");
 407
 408module_param_array_named(link, link_state, int, NULL, 0);
 409module_param_array(trace, int, NULL, 0);
 410module_param_array(tx_coal_tick, int, NULL, 0);
 411module_param_array(max_tx_desc, int, NULL, 0);
 412module_param_array(rx_coal_tick, int, NULL, 0);
 413module_param_array(max_rx_desc, int, NULL, 0);
 414module_param_array(tx_ratio, int, NULL, 0);
 415MODULE_PARM_DESC(link, "AceNIC/3C985/NetGear link state");
 416MODULE_PARM_DESC(trace, "AceNIC/3C985/NetGear firmware trace level");
 417MODULE_PARM_DESC(tx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first tx descriptor arrives");
 418MODULE_PARM_DESC(max_tx_desc, "AceNIC/3C985/GA620 max number of transmit descriptors to wait");
 419MODULE_PARM_DESC(rx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first rx descriptor arrives");
 420MODULE_PARM_DESC(max_rx_desc, "AceNIC/3C985/GA620 max number of receive descriptors to wait");
 421MODULE_PARM_DESC(tx_ratio, "AceNIC/3C985/GA620 ratio of NIC memory used for TX/RX descriptors (range 0-63)");
 422
 423
 424static const char version[] =
 425  "acenic.c: v0.92 08/05/2002  Jes Sorensen, linux-acenic@SunSITE.dk\n"
 426  "                            http://home.cern.ch/~jes/gige/acenic.html\n";
 427
 428static int ace_get_link_ksettings(struct net_device *,
 429				  struct ethtool_link_ksettings *);
 430static int ace_set_link_ksettings(struct net_device *,
 431				  const struct ethtool_link_ksettings *);
 432static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 433
 434static const struct ethtool_ops ace_ethtool_ops = {
 435	.get_drvinfo = ace_get_drvinfo,
 436	.get_link_ksettings = ace_get_link_ksettings,
 437	.set_link_ksettings = ace_set_link_ksettings,
 438};
 439
 440static void ace_watchdog(struct net_device *dev, unsigned int txqueue);
 441
 442static const struct net_device_ops ace_netdev_ops = {
 443	.ndo_open		= ace_open,
 444	.ndo_stop		= ace_close,
 445	.ndo_tx_timeout		= ace_watchdog,
 446	.ndo_get_stats		= ace_get_stats,
 447	.ndo_start_xmit		= ace_start_xmit,
 448	.ndo_set_rx_mode	= ace_set_multicast_list,
 449	.ndo_validate_addr	= eth_validate_addr,
 450	.ndo_set_mac_address	= ace_set_mac_addr,
 451	.ndo_change_mtu		= ace_change_mtu,
 452};
 453
 454static int acenic_probe_one(struct pci_dev *pdev,
 455			    const struct pci_device_id *id)
 456{
 457	struct net_device *dev;
 458	struct ace_private *ap;
 459	static int boards_found;
 460
 461	dev = alloc_etherdev(sizeof(struct ace_private));
 462	if (dev == NULL)
 463		return -ENOMEM;
 464
 465	SET_NETDEV_DEV(dev, &pdev->dev);
 466
 467	ap = netdev_priv(dev);
 468	ap->ndev = dev;
 469	ap->pdev = pdev;
 470	ap->name = pci_name(pdev);
 471
 472	dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
 473	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 474
 475	dev->watchdog_timeo = 5*HZ;
 476	dev->min_mtu = 0;
 477	dev->max_mtu = ACE_JUMBO_MTU;
 478
 479	dev->netdev_ops = &ace_netdev_ops;
 480	dev->ethtool_ops = &ace_ethtool_ops;
 481
 482	/* we only display this string ONCE */
 483	if (!boards_found)
 484		printk(version);
 485
 486	if (pci_enable_device(pdev))
 487		goto fail_free_netdev;
 488
 489	/*
 490	 * Enable master mode before we start playing with the
 491	 * pci_command word since pci_set_master() will modify
 492	 * it.
 493	 */
 494	pci_set_master(pdev);
 495
 496	pci_read_config_word(pdev, PCI_COMMAND, &ap->pci_command);
 497
 498	/* OpenFirmware on Mac's does not set this - DOH.. */
 499	if (!(ap->pci_command & PCI_COMMAND_MEMORY)) {
 500		printk(KERN_INFO "%s: Enabling PCI Memory Mapped "
 501		       "access - was not enabled by BIOS/Firmware\n",
 502		       ap->name);
 503		ap->pci_command = ap->pci_command | PCI_COMMAND_MEMORY;
 504		pci_write_config_word(ap->pdev, PCI_COMMAND,
 505				      ap->pci_command);
 506		wmb();
 507	}
 508
 509	pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &ap->pci_latency);
 510	if (ap->pci_latency <= 0x40) {
 511		ap->pci_latency = 0x40;
 512		pci_write_config_byte(pdev, PCI_LATENCY_TIMER, ap->pci_latency);
 513	}
 514
 515	/*
 516	 * Remap the regs into kernel space - this is abuse of
 517	 * dev->base_addr since it was means for I/O port
 518	 * addresses but who gives a damn.
 519	 */
 520	dev->base_addr = pci_resource_start(pdev, 0);
 521	ap->regs = ioremap(dev->base_addr, 0x4000);
 522	if (!ap->regs) {
 523		printk(KERN_ERR "%s:  Unable to map I/O register, "
 524		       "AceNIC %i will be disabled.\n",
 525		       ap->name, boards_found);
 526		goto fail_free_netdev;
 527	}
 528
 529	switch(pdev->vendor) {
 530	case PCI_VENDOR_ID_ALTEON:
 531		if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9100T) {
 532			printk(KERN_INFO "%s: Farallon PN9100-T ",
 533			       ap->name);
 534		} else {
 535			printk(KERN_INFO "%s: Alteon AceNIC ",
 536			       ap->name);
 537		}
 538		break;
 539	case PCI_VENDOR_ID_3COM:
 540		printk(KERN_INFO "%s: 3Com 3C985 ", ap->name);
 541		break;
 542	case PCI_VENDOR_ID_NETGEAR:
 543		printk(KERN_INFO "%s: NetGear GA620 ", ap->name);
 544		break;
 545	case PCI_VENDOR_ID_DEC:
 546		if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9000SX) {
 547			printk(KERN_INFO "%s: Farallon PN9000-SX ",
 548			       ap->name);
 549			break;
 550		}
 551		fallthrough;
 552	case PCI_VENDOR_ID_SGI:
 553		printk(KERN_INFO "%s: SGI AceNIC ", ap->name);
 554		break;
 555	default:
 556		printk(KERN_INFO "%s: Unknown AceNIC ", ap->name);
 557		break;
 558	}
 559
 560	printk("Gigabit Ethernet at 0x%08lx, ", dev->base_addr);
 561	printk("irq %d\n", pdev->irq);
 562
 563#ifdef CONFIG_ACENIC_OMIT_TIGON_I
 564	if ((readl(&ap->regs->HostCtrl) >> 28) == 4) {
 565		printk(KERN_ERR "%s: Driver compiled without Tigon I"
 566		       " support - NIC disabled\n", dev->name);
 567		goto fail_uninit;
 568	}
 569#endif
 570
 571	if (ace_allocate_descriptors(dev))
 572		goto fail_free_netdev;
 573
 574#ifdef MODULE
 575	if (boards_found >= ACE_MAX_MOD_PARMS)
 576		ap->board_idx = BOARD_IDX_OVERFLOW;
 577	else
 578		ap->board_idx = boards_found;
 579#else
 580	ap->board_idx = BOARD_IDX_STATIC;
 581#endif
 582
 583	if (ace_init(dev))
 584		goto fail_free_netdev;
 585
 586	if (register_netdev(dev)) {
 587		printk(KERN_ERR "acenic: device registration failed\n");
 588		goto fail_uninit;
 589	}
 590	ap->name = dev->name;
 591
 592	dev->features |= NETIF_F_HIGHDMA;
 
 593
 594	pci_set_drvdata(pdev, dev);
 595
 596	boards_found++;
 597	return 0;
 598
 599 fail_uninit:
 600	ace_init_cleanup(dev);
 601 fail_free_netdev:
 602	free_netdev(dev);
 603	return -ENODEV;
 604}
 605
 606static void acenic_remove_one(struct pci_dev *pdev)
 607{
 608	struct net_device *dev = pci_get_drvdata(pdev);
 609	struct ace_private *ap = netdev_priv(dev);
 610	struct ace_regs __iomem *regs = ap->regs;
 611	short i;
 612
 613	unregister_netdev(dev);
 614
 615	writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
 616	if (ap->version >= 2)
 617		writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
 618
 619	/*
 620	 * This clears any pending interrupts
 621	 */
 622	writel(1, &regs->Mb0Lo);
 623	readl(&regs->CpuCtrl);	/* flush */
 624
 625	/*
 626	 * Make sure no other CPUs are processing interrupts
 627	 * on the card before the buffers are being released.
 628	 * Otherwise one might experience some `interesting'
 629	 * effects.
 630	 *
 631	 * Then release the RX buffers - jumbo buffers were
 632	 * already released in ace_close().
 633	 */
 634	ace_sync_irq(dev->irq);
 635
 636	for (i = 0; i < RX_STD_RING_ENTRIES; i++) {
 637		struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb;
 638
 639		if (skb) {
 640			struct ring_info *ringp;
 641			dma_addr_t mapping;
 642
 643			ringp = &ap->skb->rx_std_skbuff[i];
 644			mapping = dma_unmap_addr(ringp, mapping);
 645			dma_unmap_page(&ap->pdev->dev, mapping,
 646				       ACE_STD_BUFSIZE, DMA_FROM_DEVICE);
 
 647
 648			ap->rx_std_ring[i].size = 0;
 649			ap->skb->rx_std_skbuff[i].skb = NULL;
 650			dev_kfree_skb(skb);
 651		}
 652	}
 653
 654	if (ap->version >= 2) {
 655		for (i = 0; i < RX_MINI_RING_ENTRIES; i++) {
 656			struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb;
 657
 658			if (skb) {
 659				struct ring_info *ringp;
 660				dma_addr_t mapping;
 661
 662				ringp = &ap->skb->rx_mini_skbuff[i];
 663				mapping = dma_unmap_addr(ringp,mapping);
 664				dma_unmap_page(&ap->pdev->dev, mapping,
 665					       ACE_MINI_BUFSIZE,
 666					       DMA_FROM_DEVICE);
 667
 668				ap->rx_mini_ring[i].size = 0;
 669				ap->skb->rx_mini_skbuff[i].skb = NULL;
 670				dev_kfree_skb(skb);
 671			}
 672		}
 673	}
 674
 675	for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) {
 676		struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb;
 677		if (skb) {
 678			struct ring_info *ringp;
 679			dma_addr_t mapping;
 680
 681			ringp = &ap->skb->rx_jumbo_skbuff[i];
 682			mapping = dma_unmap_addr(ringp, mapping);
 683			dma_unmap_page(&ap->pdev->dev, mapping,
 684				       ACE_JUMBO_BUFSIZE, DMA_FROM_DEVICE);
 
 685
 686			ap->rx_jumbo_ring[i].size = 0;
 687			ap->skb->rx_jumbo_skbuff[i].skb = NULL;
 688			dev_kfree_skb(skb);
 689		}
 690	}
 691
 692	ace_init_cleanup(dev);
 693	free_netdev(dev);
 694}
 695
 696static struct pci_driver acenic_pci_driver = {
 697	.name		= "acenic",
 698	.id_table	= acenic_pci_tbl,
 699	.probe		= acenic_probe_one,
 700	.remove		= acenic_remove_one,
 701};
 702
 703static void ace_free_descriptors(struct net_device *dev)
 704{
 705	struct ace_private *ap = netdev_priv(dev);
 706	int size;
 707
 708	if (ap->rx_std_ring != NULL) {
 709		size = (sizeof(struct rx_desc) *
 710			(RX_STD_RING_ENTRIES +
 711			 RX_JUMBO_RING_ENTRIES +
 712			 RX_MINI_RING_ENTRIES +
 713			 RX_RETURN_RING_ENTRIES));
 714		dma_free_coherent(&ap->pdev->dev, size, ap->rx_std_ring,
 715				  ap->rx_ring_base_dma);
 716		ap->rx_std_ring = NULL;
 717		ap->rx_jumbo_ring = NULL;
 718		ap->rx_mini_ring = NULL;
 719		ap->rx_return_ring = NULL;
 720	}
 721	if (ap->evt_ring != NULL) {
 722		size = (sizeof(struct event) * EVT_RING_ENTRIES);
 723		dma_free_coherent(&ap->pdev->dev, size, ap->evt_ring,
 724				  ap->evt_ring_dma);
 725		ap->evt_ring = NULL;
 726	}
 727	if (ap->tx_ring != NULL && !ACE_IS_TIGON_I(ap)) {
 728		size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES);
 729		dma_free_coherent(&ap->pdev->dev, size, ap->tx_ring,
 730				  ap->tx_ring_dma);
 731	}
 732	ap->tx_ring = NULL;
 733
 734	if (ap->evt_prd != NULL) {
 735		dma_free_coherent(&ap->pdev->dev, sizeof(u32),
 736				  (void *)ap->evt_prd, ap->evt_prd_dma);
 737		ap->evt_prd = NULL;
 738	}
 739	if (ap->rx_ret_prd != NULL) {
 740		dma_free_coherent(&ap->pdev->dev, sizeof(u32),
 741				  (void *)ap->rx_ret_prd, ap->rx_ret_prd_dma);
 
 742		ap->rx_ret_prd = NULL;
 743	}
 744	if (ap->tx_csm != NULL) {
 745		dma_free_coherent(&ap->pdev->dev, sizeof(u32),
 746				  (void *)ap->tx_csm, ap->tx_csm_dma);
 747		ap->tx_csm = NULL;
 748	}
 749}
 750
 751
 752static int ace_allocate_descriptors(struct net_device *dev)
 753{
 754	struct ace_private *ap = netdev_priv(dev);
 755	int size;
 756
 757	size = (sizeof(struct rx_desc) *
 758		(RX_STD_RING_ENTRIES +
 759		 RX_JUMBO_RING_ENTRIES +
 760		 RX_MINI_RING_ENTRIES +
 761		 RX_RETURN_RING_ENTRIES));
 762
 763	ap->rx_std_ring = dma_alloc_coherent(&ap->pdev->dev, size,
 764					     &ap->rx_ring_base_dma, GFP_KERNEL);
 765	if (ap->rx_std_ring == NULL)
 766		goto fail;
 767
 768	ap->rx_jumbo_ring = ap->rx_std_ring + RX_STD_RING_ENTRIES;
 769	ap->rx_mini_ring = ap->rx_jumbo_ring + RX_JUMBO_RING_ENTRIES;
 770	ap->rx_return_ring = ap->rx_mini_ring + RX_MINI_RING_ENTRIES;
 771
 772	size = (sizeof(struct event) * EVT_RING_ENTRIES);
 773
 774	ap->evt_ring = dma_alloc_coherent(&ap->pdev->dev, size,
 775					  &ap->evt_ring_dma, GFP_KERNEL);
 776
 777	if (ap->evt_ring == NULL)
 778		goto fail;
 779
 780	/*
 781	 * Only allocate a host TX ring for the Tigon II, the Tigon I
 782	 * has to use PCI registers for this ;-(
 783	 */
 784	if (!ACE_IS_TIGON_I(ap)) {
 785		size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES);
 786
 787		ap->tx_ring = dma_alloc_coherent(&ap->pdev->dev, size,
 788						 &ap->tx_ring_dma, GFP_KERNEL);
 789
 790		if (ap->tx_ring == NULL)
 791			goto fail;
 792	}
 793
 794	ap->evt_prd = dma_alloc_coherent(&ap->pdev->dev, sizeof(u32),
 795					 &ap->evt_prd_dma, GFP_KERNEL);
 796	if (ap->evt_prd == NULL)
 797		goto fail;
 798
 799	ap->rx_ret_prd = dma_alloc_coherent(&ap->pdev->dev, sizeof(u32),
 800					    &ap->rx_ret_prd_dma, GFP_KERNEL);
 801	if (ap->rx_ret_prd == NULL)
 802		goto fail;
 803
 804	ap->tx_csm = dma_alloc_coherent(&ap->pdev->dev, sizeof(u32),
 805					&ap->tx_csm_dma, GFP_KERNEL);
 806	if (ap->tx_csm == NULL)
 807		goto fail;
 808
 809	return 0;
 810
 811fail:
 812	/* Clean up. */
 813	ace_init_cleanup(dev);
 814	return 1;
 815}
 816
 817
 818/*
 819 * Generic cleanup handling data allocated during init. Used when the
 820 * module is unloaded or if an error occurs during initialization
 821 */
 822static void ace_init_cleanup(struct net_device *dev)
 823{
 824	struct ace_private *ap;
 825
 826	ap = netdev_priv(dev);
 827
 828	ace_free_descriptors(dev);
 829
 830	if (ap->info)
 831		dma_free_coherent(&ap->pdev->dev, sizeof(struct ace_info),
 832				  ap->info, ap->info_dma);
 833	kfree(ap->skb);
 834	kfree(ap->trace_buf);
 835
 836	if (dev->irq)
 837		free_irq(dev->irq, dev);
 838
 839	iounmap(ap->regs);
 840}
 841
 842
 843/*
 844 * Commands are considered to be slow.
 845 */
 846static inline void ace_issue_cmd(struct ace_regs __iomem *regs, struct cmd *cmd)
 847{
 848	u32 idx;
 849
 850	idx = readl(&regs->CmdPrd);
 851
 852	writel(*(u32 *)(cmd), &regs->CmdRng[idx]);
 853	idx = (idx + 1) % CMD_RING_ENTRIES;
 854
 855	writel(idx, &regs->CmdPrd);
 856}
 857
 858
 859static int ace_init(struct net_device *dev)
 860{
 861	struct ace_private *ap;
 862	struct ace_regs __iomem *regs;
 863	struct ace_info *info = NULL;
 864	struct pci_dev *pdev;
 865	unsigned long myjif;
 866	u64 tmp_ptr;
 867	u32 tig_ver, mac1, mac2, tmp, pci_state;
 868	int board_idx, ecode = 0;
 869	short i;
 870	unsigned char cache_size;
 871	u8 addr[ETH_ALEN];
 872
 873	ap = netdev_priv(dev);
 874	regs = ap->regs;
 875
 876	board_idx = ap->board_idx;
 877
 878	/*
 879	 * aman@sgi.com - its useful to do a NIC reset here to
 880	 * address the `Firmware not running' problem subsequent
 881	 * to any crashes involving the NIC
 882	 */
 883	writel(HW_RESET | (HW_RESET << 24), &regs->HostCtrl);
 884	readl(&regs->HostCtrl);		/* PCI write posting */
 885	udelay(5);
 886
 887	/*
 888	 * Don't access any other registers before this point!
 889	 */
 890#ifdef __BIG_ENDIAN
 891	/*
 892	 * This will most likely need BYTE_SWAP once we switch
 893	 * to using __raw_writel()
 894	 */
 895	writel((WORD_SWAP | CLR_INT | ((WORD_SWAP | CLR_INT) << 24)),
 896	       &regs->HostCtrl);
 897#else
 898	writel((CLR_INT | WORD_SWAP | ((CLR_INT | WORD_SWAP) << 24)),
 899	       &regs->HostCtrl);
 900#endif
 901	readl(&regs->HostCtrl);		/* PCI write posting */
 902
 903	/*
 904	 * Stop the NIC CPU and clear pending interrupts
 905	 */
 906	writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
 907	readl(&regs->CpuCtrl);		/* PCI write posting */
 908	writel(0, &regs->Mb0Lo);
 909
 910	tig_ver = readl(&regs->HostCtrl) >> 28;
 911
 912	switch(tig_ver){
 913#ifndef CONFIG_ACENIC_OMIT_TIGON_I
 914	case 4:
 915	case 5:
 916		printk(KERN_INFO "  Tigon I  (Rev. %i), Firmware: %i.%i.%i, ",
 917		       tig_ver, ap->firmware_major, ap->firmware_minor,
 918		       ap->firmware_fix);
 919		writel(0, &regs->LocalCtrl);
 920		ap->version = 1;
 921		ap->tx_ring_entries = TIGON_I_TX_RING_ENTRIES;
 922		break;
 923#endif
 924	case 6:
 925		printk(KERN_INFO "  Tigon II (Rev. %i), Firmware: %i.%i.%i, ",
 926		       tig_ver, ap->firmware_major, ap->firmware_minor,
 927		       ap->firmware_fix);
 928		writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
 929		readl(&regs->CpuBCtrl);		/* PCI write posting */
 930		/*
 931		 * The SRAM bank size does _not_ indicate the amount
 932		 * of memory on the card, it controls the _bank_ size!
 933		 * Ie. a 1MB AceNIC will have two banks of 512KB.
 934		 */
 935		writel(SRAM_BANK_512K, &regs->LocalCtrl);
 936		writel(SYNC_SRAM_TIMING, &regs->MiscCfg);
 937		ap->version = 2;
 938		ap->tx_ring_entries = MAX_TX_RING_ENTRIES;
 939		break;
 940	default:
 941		printk(KERN_WARNING "  Unsupported Tigon version detected "
 942		       "(%i)\n", tig_ver);
 943		ecode = -ENODEV;
 944		goto init_error;
 945	}
 946
 947	/*
 948	 * ModeStat _must_ be set after the SRAM settings as this change
 949	 * seems to corrupt the ModeStat and possible other registers.
 950	 * The SRAM settings survive resets and setting it to the same
 951	 * value a second time works as well. This is what caused the
 952	 * `Firmware not running' problem on the Tigon II.
 953	 */
 954#ifdef __BIG_ENDIAN
 955	writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL | ACE_BYTE_SWAP_BD |
 956	       ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, &regs->ModeStat);
 957#else
 958	writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL |
 959	       ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, &regs->ModeStat);
 960#endif
 961	readl(&regs->ModeStat);		/* PCI write posting */
 962
 963	mac1 = 0;
 964	for(i = 0; i < 4; i++) {
 965		int t;
 966
 967		mac1 = mac1 << 8;
 968		t = read_eeprom_byte(dev, 0x8c+i);
 969		if (t < 0) {
 970			ecode = -EIO;
 971			goto init_error;
 972		} else
 973			mac1 |= (t & 0xff);
 974	}
 975	mac2 = 0;
 976	for(i = 4; i < 8; i++) {
 977		int t;
 978
 979		mac2 = mac2 << 8;
 980		t = read_eeprom_byte(dev, 0x8c+i);
 981		if (t < 0) {
 982			ecode = -EIO;
 983			goto init_error;
 984		} else
 985			mac2 |= (t & 0xff);
 986	}
 987
 988	writel(mac1, &regs->MacAddrHi);
 989	writel(mac2, &regs->MacAddrLo);
 990
 991	addr[0] = (mac1 >> 8) & 0xff;
 992	addr[1] = mac1 & 0xff;
 993	addr[2] = (mac2 >> 24) & 0xff;
 994	addr[3] = (mac2 >> 16) & 0xff;
 995	addr[4] = (mac2 >> 8) & 0xff;
 996	addr[5] = mac2 & 0xff;
 997	eth_hw_addr_set(dev, addr);
 998
 999	printk("MAC: %pM\n", dev->dev_addr);
1000
1001	/*
1002	 * Looks like this is necessary to deal with on all architectures,
1003	 * even this %$#%$# N440BX Intel based thing doesn't get it right.
1004	 * Ie. having two NICs in the machine, one will have the cache
1005	 * line set at boot time, the other will not.
1006	 */
1007	pdev = ap->pdev;
1008	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_size);
1009	cache_size <<= 2;
1010	if (cache_size != SMP_CACHE_BYTES) {
1011		printk(KERN_INFO "  PCI cache line size set incorrectly "
1012		       "(%i bytes) by BIOS/FW, ", cache_size);
1013		if (cache_size > SMP_CACHE_BYTES)
1014			printk("expecting %i\n", SMP_CACHE_BYTES);
1015		else {
1016			printk("correcting to %i\n", SMP_CACHE_BYTES);
1017			pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE,
1018					      SMP_CACHE_BYTES >> 2);
1019		}
1020	}
1021
1022	pci_state = readl(&regs->PciState);
1023	printk(KERN_INFO "  PCI bus width: %i bits, speed: %iMHz, "
1024	       "latency: %i clks\n",
1025	       	(pci_state & PCI_32BIT) ? 32 : 64,
1026		(pci_state & PCI_66MHZ) ? 66 : 33,
1027		ap->pci_latency);
1028
1029	/*
1030	 * Set the max DMA transfer size. Seems that for most systems
1031	 * the performance is better when no MAX parameter is
1032	 * set. However for systems enabling PCI write and invalidate,
1033	 * DMA writes must be set to the L1 cache line size to get
1034	 * optimal performance.
1035	 *
1036	 * The default is now to turn the PCI write and invalidate off
1037	 * - that is what Alteon does for NT.
1038	 */
1039	tmp = READ_CMD_MEM | WRITE_CMD_MEM;
1040	if (ap->version >= 2) {
1041		tmp |= (MEM_READ_MULTIPLE | (pci_state & PCI_66MHZ));
1042		/*
1043		 * Tuning parameters only supported for 8 cards
1044		 */
1045		if (board_idx == BOARD_IDX_OVERFLOW ||
1046		    dis_pci_mem_inval[board_idx]) {
1047			if (ap->pci_command & PCI_COMMAND_INVALIDATE) {
1048				ap->pci_command &= ~PCI_COMMAND_INVALIDATE;
1049				pci_write_config_word(pdev, PCI_COMMAND,
1050						      ap->pci_command);
1051				printk(KERN_INFO "  Disabling PCI memory "
1052				       "write and invalidate\n");
1053			}
1054		} else if (ap->pci_command & PCI_COMMAND_INVALIDATE) {
1055			printk(KERN_INFO "  PCI memory write & invalidate "
1056			       "enabled by BIOS, enabling counter measures\n");
1057
1058			switch(SMP_CACHE_BYTES) {
1059			case 16:
1060				tmp |= DMA_WRITE_MAX_16;
1061				break;
1062			case 32:
1063				tmp |= DMA_WRITE_MAX_32;
1064				break;
1065			case 64:
1066				tmp |= DMA_WRITE_MAX_64;
1067				break;
1068			case 128:
1069				tmp |= DMA_WRITE_MAX_128;
1070				break;
1071			default:
1072				printk(KERN_INFO "  Cache line size %i not "
1073				       "supported, PCI write and invalidate "
1074				       "disabled\n", SMP_CACHE_BYTES);
1075				ap->pci_command &= ~PCI_COMMAND_INVALIDATE;
1076				pci_write_config_word(pdev, PCI_COMMAND,
1077						      ap->pci_command);
1078			}
1079		}
1080	}
1081
1082#ifdef __sparc__
1083	/*
1084	 * On this platform, we know what the best dma settings
1085	 * are.  We use 64-byte maximum bursts, because if we
1086	 * burst larger than the cache line size (or even cross
1087	 * a 64byte boundary in a single burst) the UltraSparc
1088	 * PCI controller will disconnect at 64-byte multiples.
1089	 *
1090	 * Read-multiple will be properly enabled above, and when
1091	 * set will give the PCI controller proper hints about
1092	 * prefetching.
1093	 */
1094	tmp &= ~DMA_READ_WRITE_MASK;
1095	tmp |= DMA_READ_MAX_64;
1096	tmp |= DMA_WRITE_MAX_64;
1097#endif
1098#ifdef __alpha__
1099	tmp &= ~DMA_READ_WRITE_MASK;
1100	tmp |= DMA_READ_MAX_128;
1101	/*
1102	 * All the docs say MUST NOT. Well, I did.
1103	 * Nothing terrible happens, if we load wrong size.
1104	 * Bit w&i still works better!
1105	 */
1106	tmp |= DMA_WRITE_MAX_128;
1107#endif
1108	writel(tmp, &regs->PciState);
1109
1110#if 0
1111	/*
1112	 * The Host PCI bus controller driver has to set FBB.
1113	 * If all devices on that PCI bus support FBB, then the controller
1114	 * can enable FBB support in the Host PCI Bus controller (or on
1115	 * the PCI-PCI bridge if that applies).
1116	 * -ggg
1117	 */
1118	/*
1119	 * I have received reports from people having problems when this
1120	 * bit is enabled.
1121	 */
1122	if (!(ap->pci_command & PCI_COMMAND_FAST_BACK)) {
1123		printk(KERN_INFO "  Enabling PCI Fast Back to Back\n");
1124		ap->pci_command |= PCI_COMMAND_FAST_BACK;
1125		pci_write_config_word(pdev, PCI_COMMAND, ap->pci_command);
1126	}
1127#endif
1128
1129	/*
1130	 * Configure DMA attributes.
1131	 */
1132	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
 
 
 
 
1133		ecode = -ENODEV;
1134		goto init_error;
1135	}
1136
1137	/*
1138	 * Initialize the generic info block and the command+event rings
1139	 * and the control blocks for the transmit and receive rings
1140	 * as they need to be setup once and for all.
1141	 */
1142	if (!(info = dma_alloc_coherent(&ap->pdev->dev, sizeof(struct ace_info),
1143					&ap->info_dma, GFP_KERNEL))) {
1144		ecode = -EAGAIN;
1145		goto init_error;
1146	}
1147	ap->info = info;
1148
1149	/*
1150	 * Get the memory for the skb rings.
1151	 */
1152	if (!(ap->skb = kzalloc(sizeof(struct ace_skb), GFP_KERNEL))) {
1153		ecode = -EAGAIN;
1154		goto init_error;
1155	}
1156
1157	ecode = request_irq(pdev->irq, ace_interrupt, IRQF_SHARED,
1158			    DRV_NAME, dev);
1159	if (ecode) {
1160		printk(KERN_WARNING "%s: Requested IRQ %d is busy\n",
1161		       DRV_NAME, pdev->irq);
1162		goto init_error;
1163	} else
1164		dev->irq = pdev->irq;
1165
1166#ifdef INDEX_DEBUG
1167	spin_lock_init(&ap->debug_lock);
1168	ap->last_tx = ACE_TX_RING_ENTRIES(ap) - 1;
1169	ap->last_std_rx = 0;
1170	ap->last_mini_rx = 0;
1171#endif
1172
 
 
 
1173	ecode = ace_load_firmware(dev);
1174	if (ecode)
1175		goto init_error;
1176
1177	ap->fw_running = 0;
1178
1179	tmp_ptr = ap->info_dma;
1180	writel(tmp_ptr >> 32, &regs->InfoPtrHi);
1181	writel(tmp_ptr & 0xffffffff, &regs->InfoPtrLo);
1182
1183	memset(ap->evt_ring, 0, EVT_RING_ENTRIES * sizeof(struct event));
1184
1185	set_aceaddr(&info->evt_ctrl.rngptr, ap->evt_ring_dma);
1186	info->evt_ctrl.flags = 0;
1187
1188	*(ap->evt_prd) = 0;
1189	wmb();
1190	set_aceaddr(&info->evt_prd_ptr, ap->evt_prd_dma);
1191	writel(0, &regs->EvtCsm);
1192
1193	set_aceaddr(&info->cmd_ctrl.rngptr, 0x100);
1194	info->cmd_ctrl.flags = 0;
1195	info->cmd_ctrl.max_len = 0;
1196
1197	for (i = 0; i < CMD_RING_ENTRIES; i++)
1198		writel(0, &regs->CmdRng[i]);
1199
1200	writel(0, &regs->CmdPrd);
1201	writel(0, &regs->CmdCsm);
1202
1203	tmp_ptr = ap->info_dma;
1204	tmp_ptr += (unsigned long) &(((struct ace_info *)0)->s.stats);
1205	set_aceaddr(&info->stats2_ptr, (dma_addr_t) tmp_ptr);
1206
1207	set_aceaddr(&info->rx_std_ctrl.rngptr, ap->rx_ring_base_dma);
1208	info->rx_std_ctrl.max_len = ACE_STD_BUFSIZE;
1209	info->rx_std_ctrl.flags =
1210	  RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1211
1212	memset(ap->rx_std_ring, 0,
1213	       RX_STD_RING_ENTRIES * sizeof(struct rx_desc));
1214
1215	for (i = 0; i < RX_STD_RING_ENTRIES; i++)
1216		ap->rx_std_ring[i].flags = BD_FLG_TCP_UDP_SUM;
1217
1218	ap->rx_std_skbprd = 0;
1219	atomic_set(&ap->cur_rx_bufs, 0);
1220
1221	set_aceaddr(&info->rx_jumbo_ctrl.rngptr,
1222		    (ap->rx_ring_base_dma +
1223		     (sizeof(struct rx_desc) * RX_STD_RING_ENTRIES)));
1224	info->rx_jumbo_ctrl.max_len = 0;
1225	info->rx_jumbo_ctrl.flags =
1226	  RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1227
1228	memset(ap->rx_jumbo_ring, 0,
1229	       RX_JUMBO_RING_ENTRIES * sizeof(struct rx_desc));
1230
1231	for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++)
1232		ap->rx_jumbo_ring[i].flags = BD_FLG_TCP_UDP_SUM | BD_FLG_JUMBO;
1233
1234	ap->rx_jumbo_skbprd = 0;
1235	atomic_set(&ap->cur_jumbo_bufs, 0);
1236
1237	memset(ap->rx_mini_ring, 0,
1238	       RX_MINI_RING_ENTRIES * sizeof(struct rx_desc));
1239
1240	if (ap->version >= 2) {
1241		set_aceaddr(&info->rx_mini_ctrl.rngptr,
1242			    (ap->rx_ring_base_dma +
1243			     (sizeof(struct rx_desc) *
1244			      (RX_STD_RING_ENTRIES +
1245			       RX_JUMBO_RING_ENTRIES))));
1246		info->rx_mini_ctrl.max_len = ACE_MINI_SIZE;
1247		info->rx_mini_ctrl.flags =
1248		  RCB_FLG_TCP_UDP_SUM|RCB_FLG_NO_PSEUDO_HDR|RCB_FLG_VLAN_ASSIST;
1249
1250		for (i = 0; i < RX_MINI_RING_ENTRIES; i++)
1251			ap->rx_mini_ring[i].flags =
1252				BD_FLG_TCP_UDP_SUM | BD_FLG_MINI;
1253	} else {
1254		set_aceaddr(&info->rx_mini_ctrl.rngptr, 0);
1255		info->rx_mini_ctrl.flags = RCB_FLG_RNG_DISABLE;
1256		info->rx_mini_ctrl.max_len = 0;
1257	}
1258
1259	ap->rx_mini_skbprd = 0;
1260	atomic_set(&ap->cur_mini_bufs, 0);
1261
1262	set_aceaddr(&info->rx_return_ctrl.rngptr,
1263		    (ap->rx_ring_base_dma +
1264		     (sizeof(struct rx_desc) *
1265		      (RX_STD_RING_ENTRIES +
1266		       RX_JUMBO_RING_ENTRIES +
1267		       RX_MINI_RING_ENTRIES))));
1268	info->rx_return_ctrl.flags = 0;
1269	info->rx_return_ctrl.max_len = RX_RETURN_RING_ENTRIES;
1270
1271	memset(ap->rx_return_ring, 0,
1272	       RX_RETURN_RING_ENTRIES * sizeof(struct rx_desc));
1273
1274	set_aceaddr(&info->rx_ret_prd_ptr, ap->rx_ret_prd_dma);
1275	*(ap->rx_ret_prd) = 0;
1276
1277	writel(TX_RING_BASE, &regs->WinBase);
1278
1279	if (ACE_IS_TIGON_I(ap)) {
1280		ap->tx_ring = (__force struct tx_desc *) regs->Window;
1281		for (i = 0; i < (TIGON_I_TX_RING_ENTRIES
1282				 * sizeof(struct tx_desc)) / sizeof(u32); i++)
1283			writel(0, (__force void __iomem *)ap->tx_ring  + i * 4);
1284
1285		set_aceaddr(&info->tx_ctrl.rngptr, TX_RING_BASE);
1286	} else {
1287		memset(ap->tx_ring, 0,
1288		       MAX_TX_RING_ENTRIES * sizeof(struct tx_desc));
1289
1290		set_aceaddr(&info->tx_ctrl.rngptr, ap->tx_ring_dma);
1291	}
1292
1293	info->tx_ctrl.max_len = ACE_TX_RING_ENTRIES(ap);
1294	tmp = RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1295
1296	/*
1297	 * The Tigon I does not like having the TX ring in host memory ;-(
1298	 */
1299	if (!ACE_IS_TIGON_I(ap))
1300		tmp |= RCB_FLG_TX_HOST_RING;
1301#if TX_COAL_INTS_ONLY
1302	tmp |= RCB_FLG_COAL_INT_ONLY;
1303#endif
1304	info->tx_ctrl.flags = tmp;
1305
1306	set_aceaddr(&info->tx_csm_ptr, ap->tx_csm_dma);
1307
1308	/*
1309	 * Potential item for tuning parameter
1310	 */
1311#if 0 /* NO */
1312	writel(DMA_THRESH_16W, &regs->DmaReadCfg);
1313	writel(DMA_THRESH_16W, &regs->DmaWriteCfg);
1314#else
1315	writel(DMA_THRESH_8W, &regs->DmaReadCfg);
1316	writel(DMA_THRESH_8W, &regs->DmaWriteCfg);
1317#endif
1318
1319	writel(0, &regs->MaskInt);
1320	writel(1, &regs->IfIdx);
1321#if 0
1322	/*
1323	 * McKinley boxes do not like us fiddling with AssistState
1324	 * this early
1325	 */
1326	writel(1, &regs->AssistState);
1327#endif
1328
1329	writel(DEF_STAT, &regs->TuneStatTicks);
1330	writel(DEF_TRACE, &regs->TuneTrace);
1331
1332	ace_set_rxtx_parms(dev, 0);
1333
1334	if (board_idx == BOARD_IDX_OVERFLOW) {
1335		printk(KERN_WARNING "%s: more than %i NICs detected, "
1336		       "ignoring module parameters!\n",
1337		       ap->name, ACE_MAX_MOD_PARMS);
1338	} else if (board_idx >= 0) {
1339		if (tx_coal_tick[board_idx])
1340			writel(tx_coal_tick[board_idx],
1341			       &regs->TuneTxCoalTicks);
1342		if (max_tx_desc[board_idx])
1343			writel(max_tx_desc[board_idx], &regs->TuneMaxTxDesc);
1344
1345		if (rx_coal_tick[board_idx])
1346			writel(rx_coal_tick[board_idx],
1347			       &regs->TuneRxCoalTicks);
1348		if (max_rx_desc[board_idx])
1349			writel(max_rx_desc[board_idx], &regs->TuneMaxRxDesc);
1350
1351		if (trace[board_idx])
1352			writel(trace[board_idx], &regs->TuneTrace);
1353
1354		if ((tx_ratio[board_idx] > 0) && (tx_ratio[board_idx] < 64))
1355			writel(tx_ratio[board_idx], &regs->TxBufRat);
1356	}
1357
1358	/*
1359	 * Default link parameters
1360	 */
1361	tmp = LNK_ENABLE | LNK_FULL_DUPLEX | LNK_1000MB | LNK_100MB |
1362		LNK_10MB | LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL | LNK_NEGOTIATE;
1363	if(ap->version >= 2)
1364		tmp |= LNK_TX_FLOW_CTL_Y;
1365
1366	/*
1367	 * Override link default parameters
1368	 */
1369	if ((board_idx >= 0) && link_state[board_idx]) {
1370		int option = link_state[board_idx];
1371
1372		tmp = LNK_ENABLE;
1373
1374		if (option & 0x01) {
1375			printk(KERN_INFO "%s: Setting half duplex link\n",
1376			       ap->name);
1377			tmp &= ~LNK_FULL_DUPLEX;
1378		}
1379		if (option & 0x02)
1380			tmp &= ~LNK_NEGOTIATE;
1381		if (option & 0x10)
1382			tmp |= LNK_10MB;
1383		if (option & 0x20)
1384			tmp |= LNK_100MB;
1385		if (option & 0x40)
1386			tmp |= LNK_1000MB;
1387		if ((option & 0x70) == 0) {
1388			printk(KERN_WARNING "%s: No media speed specified, "
1389			       "forcing auto negotiation\n", ap->name);
1390			tmp |= LNK_NEGOTIATE | LNK_1000MB |
1391				LNK_100MB | LNK_10MB;
1392		}
1393		if ((option & 0x100) == 0)
1394			tmp |= LNK_NEG_FCTL;
1395		else
1396			printk(KERN_INFO "%s: Disabling flow control "
1397			       "negotiation\n", ap->name);
1398		if (option & 0x200)
1399			tmp |= LNK_RX_FLOW_CTL_Y;
1400		if ((option & 0x400) && (ap->version >= 2)) {
1401			printk(KERN_INFO "%s: Enabling TX flow control\n",
1402			       ap->name);
1403			tmp |= LNK_TX_FLOW_CTL_Y;
1404		}
1405	}
1406
1407	ap->link = tmp;
1408	writel(tmp, &regs->TuneLink);
1409	if (ap->version >= 2)
1410		writel(tmp, &regs->TuneFastLink);
1411
1412	writel(ap->firmware_start, &regs->Pc);
1413
1414	writel(0, &regs->Mb0Lo);
1415
1416	/*
1417	 * Set tx_csm before we start receiving interrupts, otherwise
1418	 * the interrupt handler might think it is supposed to process
1419	 * tx ints before we are up and running, which may cause a null
1420	 * pointer access in the int handler.
1421	 */
1422	ap->cur_rx = 0;
1423	ap->tx_prd = *(ap->tx_csm) = ap->tx_ret_csm = 0;
1424
1425	wmb();
1426	ace_set_txprd(regs, ap, 0);
1427	writel(0, &regs->RxRetCsm);
1428
1429	/*
1430	 * Enable DMA engine now.
1431	 * If we do this sooner, Mckinley box pukes.
1432	 * I assume it's because Tigon II DMA engine wants to check
1433	 * *something* even before the CPU is started.
1434	 */
1435	writel(1, &regs->AssistState);  /* enable DMA */
1436
1437	/*
1438	 * Start the NIC CPU
1439	 */
1440	writel(readl(&regs->CpuCtrl) & ~(CPU_HALT|CPU_TRACE), &regs->CpuCtrl);
1441	readl(&regs->CpuCtrl);
1442
1443	/*
1444	 * Wait for the firmware to spin up - max 3 seconds.
1445	 */
1446	myjif = jiffies + 3 * HZ;
1447	while (time_before(jiffies, myjif) && !ap->fw_running)
1448		cpu_relax();
1449
1450	if (!ap->fw_running) {
1451		printk(KERN_ERR "%s: Firmware NOT running!\n", ap->name);
1452
1453		ace_dump_trace(ap);
1454		writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
1455		readl(&regs->CpuCtrl);
1456
1457		/* aman@sgi.com - account for badly behaving firmware/NIC:
1458		 * - have observed that the NIC may continue to generate
1459		 *   interrupts for some reason; attempt to stop it - halt
1460		 *   second CPU for Tigon II cards, and also clear Mb0
1461		 * - if we're a module, we'll fail to load if this was
1462		 *   the only GbE card in the system => if the kernel does
1463		 *   see an interrupt from the NIC, code to handle it is
1464		 *   gone and OOps! - so free_irq also
1465		 */
1466		if (ap->version >= 2)
1467			writel(readl(&regs->CpuBCtrl) | CPU_HALT,
1468			       &regs->CpuBCtrl);
1469		writel(0, &regs->Mb0Lo);
1470		readl(&regs->Mb0Lo);
1471
1472		ecode = -EBUSY;
1473		goto init_error;
1474	}
1475
1476	/*
1477	 * We load the ring here as there seem to be no way to tell the
1478	 * firmware to wipe the ring without re-initializing it.
1479	 */
1480	if (!test_and_set_bit(0, &ap->std_refill_busy))
1481		ace_load_std_rx_ring(dev, RX_RING_SIZE);
1482	else
1483		printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n",
1484		       ap->name);
1485	if (ap->version >= 2) {
1486		if (!test_and_set_bit(0, &ap->mini_refill_busy))
1487			ace_load_mini_rx_ring(dev, RX_MINI_SIZE);
1488		else
1489			printk(KERN_ERR "%s: Someone is busy refilling "
1490			       "the RX mini ring\n", ap->name);
1491	}
1492	return 0;
1493
1494 init_error:
1495	ace_init_cleanup(dev);
1496	return ecode;
1497}
1498
1499
1500static void ace_set_rxtx_parms(struct net_device *dev, int jumbo)
1501{
1502	struct ace_private *ap = netdev_priv(dev);
1503	struct ace_regs __iomem *regs = ap->regs;
1504	int board_idx = ap->board_idx;
1505
1506	if (board_idx >= 0) {
1507		if (!jumbo) {
1508			if (!tx_coal_tick[board_idx])
1509				writel(DEF_TX_COAL, &regs->TuneTxCoalTicks);
1510			if (!max_tx_desc[board_idx])
1511				writel(DEF_TX_MAX_DESC, &regs->TuneMaxTxDesc);
1512			if (!rx_coal_tick[board_idx])
1513				writel(DEF_RX_COAL, &regs->TuneRxCoalTicks);
1514			if (!max_rx_desc[board_idx])
1515				writel(DEF_RX_MAX_DESC, &regs->TuneMaxRxDesc);
1516			if (!tx_ratio[board_idx])
1517				writel(DEF_TX_RATIO, &regs->TxBufRat);
1518		} else {
1519			if (!tx_coal_tick[board_idx])
1520				writel(DEF_JUMBO_TX_COAL,
1521				       &regs->TuneTxCoalTicks);
1522			if (!max_tx_desc[board_idx])
1523				writel(DEF_JUMBO_TX_MAX_DESC,
1524				       &regs->TuneMaxTxDesc);
1525			if (!rx_coal_tick[board_idx])
1526				writel(DEF_JUMBO_RX_COAL,
1527				       &regs->TuneRxCoalTicks);
1528			if (!max_rx_desc[board_idx])
1529				writel(DEF_JUMBO_RX_MAX_DESC,
1530				       &regs->TuneMaxRxDesc);
1531			if (!tx_ratio[board_idx])
1532				writel(DEF_JUMBO_TX_RATIO, &regs->TxBufRat);
1533		}
1534	}
1535}
1536
1537
1538static void ace_watchdog(struct net_device *data, unsigned int txqueue)
1539{
1540	struct net_device *dev = data;
1541	struct ace_private *ap = netdev_priv(dev);
1542	struct ace_regs __iomem *regs = ap->regs;
1543
1544	/*
1545	 * We haven't received a stats update event for more than 2.5
1546	 * seconds and there is data in the transmit queue, thus we
1547	 * assume the card is stuck.
1548	 */
1549	if (*ap->tx_csm != ap->tx_ret_csm) {
1550		printk(KERN_WARNING "%s: Transmitter is stuck, %08x\n",
1551		       dev->name, (unsigned int)readl(&regs->HostCtrl));
1552		/* This can happen due to ieee flow control. */
1553	} else {
1554		printk(KERN_DEBUG "%s: BUG... transmitter died. Kicking it.\n",
1555		       dev->name);
1556#if 0
1557		netif_wake_queue(dev);
1558#endif
1559	}
1560}
1561
1562
1563static void ace_tasklet(struct tasklet_struct *t)
1564{
1565	struct ace_private *ap = from_tasklet(ap, t, ace_tasklet);
1566	struct net_device *dev = ap->ndev;
1567	int cur_size;
1568
1569	cur_size = atomic_read(&ap->cur_rx_bufs);
1570	if ((cur_size < RX_LOW_STD_THRES) &&
1571	    !test_and_set_bit(0, &ap->std_refill_busy)) {
1572#ifdef DEBUG
1573		printk("refilling buffers (current %i)\n", cur_size);
1574#endif
1575		ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size);
1576	}
1577
1578	if (ap->version >= 2) {
1579		cur_size = atomic_read(&ap->cur_mini_bufs);
1580		if ((cur_size < RX_LOW_MINI_THRES) &&
1581		    !test_and_set_bit(0, &ap->mini_refill_busy)) {
1582#ifdef DEBUG
1583			printk("refilling mini buffers (current %i)\n",
1584			       cur_size);
1585#endif
1586			ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size);
1587		}
1588	}
1589
1590	cur_size = atomic_read(&ap->cur_jumbo_bufs);
1591	if (ap->jumbo && (cur_size < RX_LOW_JUMBO_THRES) &&
1592	    !test_and_set_bit(0, &ap->jumbo_refill_busy)) {
1593#ifdef DEBUG
1594		printk("refilling jumbo buffers (current %i)\n", cur_size);
1595#endif
1596		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size);
1597	}
1598	ap->tasklet_pending = 0;
1599}
1600
1601
1602/*
1603 * Copy the contents of the NIC's trace buffer to kernel memory.
1604 */
1605static void ace_dump_trace(struct ace_private *ap)
1606{
1607#if 0
1608	if (!ap->trace_buf)
1609		if (!(ap->trace_buf = kmalloc(ACE_TRACE_SIZE, GFP_KERNEL)))
1610		    return;
1611#endif
1612}
1613
1614
1615/*
1616 * Load the standard rx ring.
1617 *
1618 * Loading rings is safe without holding the spin lock since this is
1619 * done only before the device is enabled, thus no interrupts are
1620 * generated and by the interrupt handler/tasklet handler.
1621 */
1622static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs)
1623{
1624	struct ace_private *ap = netdev_priv(dev);
1625	struct ace_regs __iomem *regs = ap->regs;
1626	short i, idx;
1627
1628
1629	prefetchw(&ap->cur_rx_bufs);
1630
1631	idx = ap->rx_std_skbprd;
1632
1633	for (i = 0; i < nr_bufs; i++) {
1634		struct sk_buff *skb;
1635		struct rx_desc *rd;
1636		dma_addr_t mapping;
1637
1638		skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE);
1639		if (!skb)
1640			break;
1641
1642		mapping = dma_map_page(&ap->pdev->dev,
1643				       virt_to_page(skb->data),
1644				       offset_in_page(skb->data),
1645				       ACE_STD_BUFSIZE, DMA_FROM_DEVICE);
 
1646		ap->skb->rx_std_skbuff[idx].skb = skb;
1647		dma_unmap_addr_set(&ap->skb->rx_std_skbuff[idx],
1648				   mapping, mapping);
1649
1650		rd = &ap->rx_std_ring[idx];
1651		set_aceaddr(&rd->addr, mapping);
1652		rd->size = ACE_STD_BUFSIZE;
1653		rd->idx = idx;
1654		idx = (idx + 1) % RX_STD_RING_ENTRIES;
1655	}
1656
1657	if (!i)
1658		goto error_out;
1659
1660	atomic_add(i, &ap->cur_rx_bufs);
1661	ap->rx_std_skbprd = idx;
1662
1663	if (ACE_IS_TIGON_I(ap)) {
1664		struct cmd cmd;
1665		cmd.evt = C_SET_RX_PRD_IDX;
1666		cmd.code = 0;
1667		cmd.idx = ap->rx_std_skbprd;
1668		ace_issue_cmd(regs, &cmd);
1669	} else {
1670		writel(idx, &regs->RxStdPrd);
1671		wmb();
1672	}
1673
1674 out:
1675	clear_bit(0, &ap->std_refill_busy);
1676	return;
1677
1678 error_out:
1679	printk(KERN_INFO "Out of memory when allocating "
1680	       "standard receive buffers\n");
1681	goto out;
1682}
1683
1684
1685static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs)
1686{
1687	struct ace_private *ap = netdev_priv(dev);
1688	struct ace_regs __iomem *regs = ap->regs;
1689	short i, idx;
1690
1691	prefetchw(&ap->cur_mini_bufs);
1692
1693	idx = ap->rx_mini_skbprd;
1694	for (i = 0; i < nr_bufs; i++) {
1695		struct sk_buff *skb;
1696		struct rx_desc *rd;
1697		dma_addr_t mapping;
1698
1699		skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE);
1700		if (!skb)
1701			break;
1702
1703		mapping = dma_map_page(&ap->pdev->dev,
1704				       virt_to_page(skb->data),
1705				       offset_in_page(skb->data),
1706				       ACE_MINI_BUFSIZE, DMA_FROM_DEVICE);
 
1707		ap->skb->rx_mini_skbuff[idx].skb = skb;
1708		dma_unmap_addr_set(&ap->skb->rx_mini_skbuff[idx],
1709				   mapping, mapping);
1710
1711		rd = &ap->rx_mini_ring[idx];
1712		set_aceaddr(&rd->addr, mapping);
1713		rd->size = ACE_MINI_BUFSIZE;
1714		rd->idx = idx;
1715		idx = (idx + 1) % RX_MINI_RING_ENTRIES;
1716	}
1717
1718	if (!i)
1719		goto error_out;
1720
1721	atomic_add(i, &ap->cur_mini_bufs);
1722
1723	ap->rx_mini_skbprd = idx;
1724
1725	writel(idx, &regs->RxMiniPrd);
1726	wmb();
1727
1728 out:
1729	clear_bit(0, &ap->mini_refill_busy);
1730	return;
1731 error_out:
1732	printk(KERN_INFO "Out of memory when allocating "
1733	       "mini receive buffers\n");
1734	goto out;
1735}
1736
1737
1738/*
1739 * Load the jumbo rx ring, this may happen at any time if the MTU
1740 * is changed to a value > 1500.
1741 */
1742static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs)
1743{
1744	struct ace_private *ap = netdev_priv(dev);
1745	struct ace_regs __iomem *regs = ap->regs;
1746	short i, idx;
1747
1748	idx = ap->rx_jumbo_skbprd;
1749
1750	for (i = 0; i < nr_bufs; i++) {
1751		struct sk_buff *skb;
1752		struct rx_desc *rd;
1753		dma_addr_t mapping;
1754
1755		skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE);
1756		if (!skb)
1757			break;
1758
1759		mapping = dma_map_page(&ap->pdev->dev,
1760				       virt_to_page(skb->data),
1761				       offset_in_page(skb->data),
1762				       ACE_JUMBO_BUFSIZE, DMA_FROM_DEVICE);
 
1763		ap->skb->rx_jumbo_skbuff[idx].skb = skb;
1764		dma_unmap_addr_set(&ap->skb->rx_jumbo_skbuff[idx],
1765				   mapping, mapping);
1766
1767		rd = &ap->rx_jumbo_ring[idx];
1768		set_aceaddr(&rd->addr, mapping);
1769		rd->size = ACE_JUMBO_BUFSIZE;
1770		rd->idx = idx;
1771		idx = (idx + 1) % RX_JUMBO_RING_ENTRIES;
1772	}
1773
1774	if (!i)
1775		goto error_out;
1776
1777	atomic_add(i, &ap->cur_jumbo_bufs);
1778	ap->rx_jumbo_skbprd = idx;
1779
1780	if (ACE_IS_TIGON_I(ap)) {
1781		struct cmd cmd;
1782		cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
1783		cmd.code = 0;
1784		cmd.idx = ap->rx_jumbo_skbprd;
1785		ace_issue_cmd(regs, &cmd);
1786	} else {
1787		writel(idx, &regs->RxJumboPrd);
1788		wmb();
1789	}
1790
1791 out:
1792	clear_bit(0, &ap->jumbo_refill_busy);
1793	return;
1794 error_out:
1795	if (net_ratelimit())
1796		printk(KERN_INFO "Out of memory when allocating "
1797		       "jumbo receive buffers\n");
1798	goto out;
1799}
1800
1801
1802/*
1803 * All events are considered to be slow (RX/TX ints do not generate
1804 * events) and are handled here, outside the main interrupt handler,
1805 * to reduce the size of the handler.
1806 */
1807static u32 ace_handle_event(struct net_device *dev, u32 evtcsm, u32 evtprd)
1808{
1809	struct ace_private *ap;
1810
1811	ap = netdev_priv(dev);
1812
1813	while (evtcsm != evtprd) {
1814		switch (ap->evt_ring[evtcsm].evt) {
1815		case E_FW_RUNNING:
1816			printk(KERN_INFO "%s: Firmware up and running\n",
1817			       ap->name);
1818			ap->fw_running = 1;
1819			wmb();
1820			break;
1821		case E_STATS_UPDATED:
1822			break;
1823		case E_LNK_STATE:
1824		{
1825			u16 code = ap->evt_ring[evtcsm].code;
1826			switch (code) {
1827			case E_C_LINK_UP:
1828			{
1829				u32 state = readl(&ap->regs->GigLnkState);
1830				printk(KERN_WARNING "%s: Optical link UP "
1831				       "(%s Duplex, Flow Control: %s%s)\n",
1832				       ap->name,
1833				       state & LNK_FULL_DUPLEX ? "Full":"Half",
1834				       state & LNK_TX_FLOW_CTL_Y ? "TX " : "",
1835				       state & LNK_RX_FLOW_CTL_Y ? "RX" : "");
1836				break;
1837			}
1838			case E_C_LINK_DOWN:
1839				printk(KERN_WARNING "%s: Optical link DOWN\n",
1840				       ap->name);
1841				break;
1842			case E_C_LINK_10_100:
1843				printk(KERN_WARNING "%s: 10/100BaseT link "
1844				       "UP\n", ap->name);
1845				break;
1846			default:
1847				printk(KERN_ERR "%s: Unknown optical link "
1848				       "state %02x\n", ap->name, code);
1849			}
1850			break;
1851		}
1852		case E_ERROR:
1853			switch(ap->evt_ring[evtcsm].code) {
1854			case E_C_ERR_INVAL_CMD:
1855				printk(KERN_ERR "%s: invalid command error\n",
1856				       ap->name);
1857				break;
1858			case E_C_ERR_UNIMP_CMD:
1859				printk(KERN_ERR "%s: unimplemented command "
1860				       "error\n", ap->name);
1861				break;
1862			case E_C_ERR_BAD_CFG:
1863				printk(KERN_ERR "%s: bad config error\n",
1864				       ap->name);
1865				break;
1866			default:
1867				printk(KERN_ERR "%s: unknown error %02x\n",
1868				       ap->name, ap->evt_ring[evtcsm].code);
1869			}
1870			break;
1871		case E_RESET_JUMBO_RNG:
1872		{
1873			int i;
1874			for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) {
1875				if (ap->skb->rx_jumbo_skbuff[i].skb) {
1876					ap->rx_jumbo_ring[i].size = 0;
1877					set_aceaddr(&ap->rx_jumbo_ring[i].addr, 0);
1878					dev_kfree_skb(ap->skb->rx_jumbo_skbuff[i].skb);
1879					ap->skb->rx_jumbo_skbuff[i].skb = NULL;
1880				}
1881			}
1882
1883			if (ACE_IS_TIGON_I(ap)) {
1884				struct cmd cmd;
1885				cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
1886				cmd.code = 0;
1887				cmd.idx = 0;
1888				ace_issue_cmd(ap->regs, &cmd);
1889			} else {
1890				writel(0, &((ap->regs)->RxJumboPrd));
1891				wmb();
1892			}
1893
1894			ap->jumbo = 0;
1895			ap->rx_jumbo_skbprd = 0;
1896			printk(KERN_INFO "%s: Jumbo ring flushed\n",
1897			       ap->name);
1898			clear_bit(0, &ap->jumbo_refill_busy);
1899			break;
1900		}
1901		default:
1902			printk(KERN_ERR "%s: Unhandled event 0x%02x\n",
1903			       ap->name, ap->evt_ring[evtcsm].evt);
1904		}
1905		evtcsm = (evtcsm + 1) % EVT_RING_ENTRIES;
1906	}
1907
1908	return evtcsm;
1909}
1910
1911
1912static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
1913{
1914	struct ace_private *ap = netdev_priv(dev);
1915	u32 idx;
1916	int mini_count = 0, std_count = 0;
1917
1918	idx = rxretcsm;
1919
1920	prefetchw(&ap->cur_rx_bufs);
1921	prefetchw(&ap->cur_mini_bufs);
1922
1923	while (idx != rxretprd) {
1924		struct ring_info *rip;
1925		struct sk_buff *skb;
1926		struct rx_desc *retdesc;
1927		u32 skbidx;
1928		int bd_flags, desc_type, mapsize;
1929		u16 csum;
1930
1931
1932		/* make sure the rx descriptor isn't read before rxretprd */
1933		if (idx == rxretcsm)
1934			rmb();
1935
1936		retdesc = &ap->rx_return_ring[idx];
1937		skbidx = retdesc->idx;
1938		bd_flags = retdesc->flags;
1939		desc_type = bd_flags & (BD_FLG_JUMBO | BD_FLG_MINI);
1940
1941		switch(desc_type) {
1942			/*
1943			 * Normal frames do not have any flags set
1944			 *
1945			 * Mini and normal frames arrive frequently,
1946			 * so use a local counter to avoid doing
1947			 * atomic operations for each packet arriving.
1948			 */
1949		case 0:
1950			rip = &ap->skb->rx_std_skbuff[skbidx];
1951			mapsize = ACE_STD_BUFSIZE;
 
1952			std_count++;
1953			break;
1954		case BD_FLG_JUMBO:
1955			rip = &ap->skb->rx_jumbo_skbuff[skbidx];
1956			mapsize = ACE_JUMBO_BUFSIZE;
 
1957			atomic_dec(&ap->cur_jumbo_bufs);
1958			break;
1959		case BD_FLG_MINI:
1960			rip = &ap->skb->rx_mini_skbuff[skbidx];
1961			mapsize = ACE_MINI_BUFSIZE;
 
1962			mini_count++;
1963			break;
1964		default:
1965			printk(KERN_INFO "%s: unknown frame type (0x%02x) "
1966			       "returned by NIC\n", dev->name,
1967			       retdesc->flags);
1968			goto error;
1969		}
1970
1971		skb = rip->skb;
1972		rip->skb = NULL;
1973		dma_unmap_page(&ap->pdev->dev, dma_unmap_addr(rip, mapping),
1974			       mapsize, DMA_FROM_DEVICE);
 
 
1975		skb_put(skb, retdesc->size);
1976
1977		/*
1978		 * Fly baby, fly!
1979		 */
1980		csum = retdesc->tcp_udp_csum;
1981
1982		skb->protocol = eth_type_trans(skb, dev);
1983
1984		/*
1985		 * Instead of forcing the poor tigon mips cpu to calculate
1986		 * pseudo hdr checksum, we do this ourselves.
1987		 */
1988		if (bd_flags & BD_FLG_TCP_UDP_SUM) {
1989			skb->csum = htons(csum);
1990			skb->ip_summed = CHECKSUM_COMPLETE;
1991		} else {
1992			skb_checksum_none_assert(skb);
1993		}
1994
1995		/* send it up */
1996		if ((bd_flags & BD_FLG_VLAN_TAG))
1997			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), retdesc->vlan);
1998		netif_rx(skb);
1999
2000		dev->stats.rx_packets++;
2001		dev->stats.rx_bytes += retdesc->size;
2002
2003		idx = (idx + 1) % RX_RETURN_RING_ENTRIES;
2004	}
2005
2006	atomic_sub(std_count, &ap->cur_rx_bufs);
2007	if (!ACE_IS_TIGON_I(ap))
2008		atomic_sub(mini_count, &ap->cur_mini_bufs);
2009
2010 out:
2011	/*
2012	 * According to the documentation RxRetCsm is obsolete with
2013	 * the 12.3.x Firmware - my Tigon I NICs seem to disagree!
2014	 */
2015	if (ACE_IS_TIGON_I(ap)) {
2016		writel(idx, &ap->regs->RxRetCsm);
2017	}
2018	ap->cur_rx = idx;
2019
2020	return;
2021 error:
2022	idx = rxretprd;
2023	goto out;
2024}
2025
2026
2027static inline void ace_tx_int(struct net_device *dev,
2028			      u32 txcsm, u32 idx)
2029{
2030	struct ace_private *ap = netdev_priv(dev);
2031
2032	do {
2033		struct sk_buff *skb;
2034		struct tx_ring_info *info;
2035
2036		info = ap->skb->tx_skbuff + idx;
2037		skb = info->skb;
2038
2039		if (dma_unmap_len(info, maplen)) {
2040			dma_unmap_page(&ap->pdev->dev,
2041				       dma_unmap_addr(info, mapping),
2042				       dma_unmap_len(info, maplen),
2043				       DMA_TO_DEVICE);
2044			dma_unmap_len_set(info, maplen, 0);
2045		}
2046
2047		if (skb) {
2048			dev->stats.tx_packets++;
2049			dev->stats.tx_bytes += skb->len;
2050			dev_consume_skb_irq(skb);
2051			info->skb = NULL;
2052		}
2053
2054		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2055	} while (idx != txcsm);
2056
2057	if (netif_queue_stopped(dev))
2058		netif_wake_queue(dev);
2059
2060	wmb();
2061	ap->tx_ret_csm = txcsm;
2062
2063	/* So... tx_ret_csm is advanced _after_ check for device wakeup.
2064	 *
2065	 * We could try to make it before. In this case we would get
2066	 * the following race condition: hard_start_xmit on other cpu
2067	 * enters after we advanced tx_ret_csm and fills space,
2068	 * which we have just freed, so that we make illegal device wakeup.
2069	 * There is no good way to workaround this (at entry
2070	 * to ace_start_xmit detects this condition and prevents
2071	 * ring corruption, but it is not a good workaround.)
2072	 *
2073	 * When tx_ret_csm is advanced after, we wake up device _only_
2074	 * if we really have some space in ring (though the core doing
2075	 * hard_start_xmit can see full ring for some period and has to
2076	 * synchronize.) Superb.
2077	 * BUT! We get another subtle race condition. hard_start_xmit
2078	 * may think that ring is full between wakeup and advancing
2079	 * tx_ret_csm and will stop device instantly! It is not so bad.
2080	 * We are guaranteed that there is something in ring, so that
2081	 * the next irq will resume transmission. To speedup this we could
2082	 * mark descriptor, which closes ring with BD_FLG_COAL_NOW
2083	 * (see ace_start_xmit).
2084	 *
2085	 * Well, this dilemma exists in all lock-free devices.
2086	 * We, following scheme used in drivers by Donald Becker,
2087	 * select the least dangerous.
2088	 *							--ANK
2089	 */
2090}
2091
2092
2093static irqreturn_t ace_interrupt(int irq, void *dev_id)
2094{
2095	struct net_device *dev = (struct net_device *)dev_id;
2096	struct ace_private *ap = netdev_priv(dev);
2097	struct ace_regs __iomem *regs = ap->regs;
2098	u32 idx;
2099	u32 txcsm, rxretcsm, rxretprd;
2100	u32 evtcsm, evtprd;
2101
2102	/*
2103	 * In case of PCI shared interrupts or spurious interrupts,
2104	 * we want to make sure it is actually our interrupt before
2105	 * spending any time in here.
2106	 */
2107	if (!(readl(&regs->HostCtrl) & IN_INT))
2108		return IRQ_NONE;
2109
2110	/*
2111	 * ACK intr now. Otherwise we will lose updates to rx_ret_prd,
2112	 * which happened _after_ rxretprd = *ap->rx_ret_prd; but before
2113	 * writel(0, &regs->Mb0Lo).
2114	 *
2115	 * "IRQ avoidance" recommended in docs applies to IRQs served
2116	 * threads and it is wrong even for that case.
2117	 */
2118	writel(0, &regs->Mb0Lo);
2119	readl(&regs->Mb0Lo);
2120
2121	/*
2122	 * There is no conflict between transmit handling in
2123	 * start_xmit and receive processing, thus there is no reason
2124	 * to take a spin lock for RX handling. Wait until we start
2125	 * working on the other stuff - hey we don't need a spin lock
2126	 * anymore.
2127	 */
2128	rxretprd = *ap->rx_ret_prd;
2129	rxretcsm = ap->cur_rx;
2130
2131	if (rxretprd != rxretcsm)
2132		ace_rx_int(dev, rxretprd, rxretcsm);
2133
2134	txcsm = *ap->tx_csm;
2135	idx = ap->tx_ret_csm;
2136
2137	if (txcsm != idx) {
2138		/*
2139		 * If each skb takes only one descriptor this check degenerates
2140		 * to identity, because new space has just been opened.
2141		 * But if skbs are fragmented we must check that this index
2142		 * update releases enough of space, otherwise we just
2143		 * wait for device to make more work.
2144		 */
2145		if (!tx_ring_full(ap, txcsm, ap->tx_prd))
2146			ace_tx_int(dev, txcsm, idx);
2147	}
2148
2149	evtcsm = readl(&regs->EvtCsm);
2150	evtprd = *ap->evt_prd;
2151
2152	if (evtcsm != evtprd) {
2153		evtcsm = ace_handle_event(dev, evtcsm, evtprd);
2154		writel(evtcsm, &regs->EvtCsm);
2155	}
2156
2157	/*
2158	 * This has to go last in the interrupt handler and run with
2159	 * the spin lock released ... what lock?
2160	 */
2161	if (netif_running(dev)) {
2162		int cur_size;
2163		int run_tasklet = 0;
2164
2165		cur_size = atomic_read(&ap->cur_rx_bufs);
2166		if (cur_size < RX_LOW_STD_THRES) {
2167			if ((cur_size < RX_PANIC_STD_THRES) &&
2168			    !test_and_set_bit(0, &ap->std_refill_busy)) {
2169#ifdef DEBUG
2170				printk("low on std buffers %i\n", cur_size);
2171#endif
2172				ace_load_std_rx_ring(dev,
2173						     RX_RING_SIZE - cur_size);
2174			} else
2175				run_tasklet = 1;
2176		}
2177
2178		if (!ACE_IS_TIGON_I(ap)) {
2179			cur_size = atomic_read(&ap->cur_mini_bufs);
2180			if (cur_size < RX_LOW_MINI_THRES) {
2181				if ((cur_size < RX_PANIC_MINI_THRES) &&
2182				    !test_and_set_bit(0,
2183						      &ap->mini_refill_busy)) {
2184#ifdef DEBUG
2185					printk("low on mini buffers %i\n",
2186					       cur_size);
2187#endif
2188					ace_load_mini_rx_ring(dev,
2189							      RX_MINI_SIZE - cur_size);
2190				} else
2191					run_tasklet = 1;
2192			}
2193		}
2194
2195		if (ap->jumbo) {
2196			cur_size = atomic_read(&ap->cur_jumbo_bufs);
2197			if (cur_size < RX_LOW_JUMBO_THRES) {
2198				if ((cur_size < RX_PANIC_JUMBO_THRES) &&
2199				    !test_and_set_bit(0,
2200						      &ap->jumbo_refill_busy)){
2201#ifdef DEBUG
2202					printk("low on jumbo buffers %i\n",
2203					       cur_size);
2204#endif
2205					ace_load_jumbo_rx_ring(dev,
2206							       RX_JUMBO_SIZE - cur_size);
2207				} else
2208					run_tasklet = 1;
2209			}
2210		}
2211		if (run_tasklet && !ap->tasklet_pending) {
2212			ap->tasklet_pending = 1;
2213			tasklet_schedule(&ap->ace_tasklet);
2214		}
2215	}
2216
2217	return IRQ_HANDLED;
2218}
2219
2220static int ace_open(struct net_device *dev)
2221{
2222	struct ace_private *ap = netdev_priv(dev);
2223	struct ace_regs __iomem *regs = ap->regs;
2224	struct cmd cmd;
2225
2226	if (!(ap->fw_running)) {
2227		printk(KERN_WARNING "%s: Firmware not running!\n", dev->name);
2228		return -EBUSY;
2229	}
2230
2231	writel(dev->mtu + ETH_HLEN + 4, &regs->IfMtu);
2232
2233	cmd.evt = C_CLEAR_STATS;
2234	cmd.code = 0;
2235	cmd.idx = 0;
2236	ace_issue_cmd(regs, &cmd);
2237
2238	cmd.evt = C_HOST_STATE;
2239	cmd.code = C_C_STACK_UP;
2240	cmd.idx = 0;
2241	ace_issue_cmd(regs, &cmd);
2242
2243	if (ap->jumbo &&
2244	    !test_and_set_bit(0, &ap->jumbo_refill_busy))
2245		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
2246
2247	if (dev->flags & IFF_PROMISC) {
2248		cmd.evt = C_SET_PROMISC_MODE;
2249		cmd.code = C_C_PROMISC_ENABLE;
2250		cmd.idx = 0;
2251		ace_issue_cmd(regs, &cmd);
2252
2253		ap->promisc = 1;
2254	}else
2255		ap->promisc = 0;
2256	ap->mcast_all = 0;
2257
2258#if 0
2259	cmd.evt = C_LNK_NEGOTIATION;
2260	cmd.code = 0;
2261	cmd.idx = 0;
2262	ace_issue_cmd(regs, &cmd);
2263#endif
2264
2265	netif_start_queue(dev);
2266
2267	/*
2268	 * Setup the bottom half rx ring refill handler
2269	 */
2270	tasklet_setup(&ap->ace_tasklet, ace_tasklet);
2271	return 0;
2272}
2273
2274
2275static int ace_close(struct net_device *dev)
2276{
2277	struct ace_private *ap = netdev_priv(dev);
2278	struct ace_regs __iomem *regs = ap->regs;
2279	struct cmd cmd;
2280	unsigned long flags;
2281	short i;
2282
2283	/*
2284	 * Without (or before) releasing irq and stopping hardware, this
2285	 * is an absolute non-sense, by the way. It will be reset instantly
2286	 * by the first irq.
2287	 */
2288	netif_stop_queue(dev);
2289
2290
2291	if (ap->promisc) {
2292		cmd.evt = C_SET_PROMISC_MODE;
2293		cmd.code = C_C_PROMISC_DISABLE;
2294		cmd.idx = 0;
2295		ace_issue_cmd(regs, &cmd);
2296		ap->promisc = 0;
2297	}
2298
2299	cmd.evt = C_HOST_STATE;
2300	cmd.code = C_C_STACK_DOWN;
2301	cmd.idx = 0;
2302	ace_issue_cmd(regs, &cmd);
2303
2304	tasklet_kill(&ap->ace_tasklet);
2305
2306	/*
2307	 * Make sure one CPU is not processing packets while
2308	 * buffers are being released by another.
2309	 */
2310
2311	local_irq_save(flags);
2312	ace_mask_irq(dev);
2313
2314	for (i = 0; i < ACE_TX_RING_ENTRIES(ap); i++) {
2315		struct sk_buff *skb;
2316		struct tx_ring_info *info;
2317
2318		info = ap->skb->tx_skbuff + i;
2319		skb = info->skb;
2320
2321		if (dma_unmap_len(info, maplen)) {
2322			if (ACE_IS_TIGON_I(ap)) {
2323				/* NB: TIGON_1 is special, tx_ring is in io space */
2324				struct tx_desc __iomem *tx;
2325				tx = (__force struct tx_desc __iomem *) &ap->tx_ring[i];
2326				writel(0, &tx->addr.addrhi);
2327				writel(0, &tx->addr.addrlo);
2328				writel(0, &tx->flagsize);
2329			} else
2330				memset(ap->tx_ring + i, 0,
2331				       sizeof(struct tx_desc));
2332			dma_unmap_page(&ap->pdev->dev,
2333				       dma_unmap_addr(info, mapping),
2334				       dma_unmap_len(info, maplen),
2335				       DMA_TO_DEVICE);
2336			dma_unmap_len_set(info, maplen, 0);
2337		}
2338		if (skb) {
2339			dev_kfree_skb(skb);
2340			info->skb = NULL;
2341		}
2342	}
2343
2344	if (ap->jumbo) {
2345		cmd.evt = C_RESET_JUMBO_RNG;
2346		cmd.code = 0;
2347		cmd.idx = 0;
2348		ace_issue_cmd(regs, &cmd);
2349	}
2350
2351	ace_unmask_irq(dev);
2352	local_irq_restore(flags);
2353
2354	return 0;
2355}
2356
2357
2358static inline dma_addr_t
2359ace_map_tx_skb(struct ace_private *ap, struct sk_buff *skb,
2360	       struct sk_buff *tail, u32 idx)
2361{
2362	dma_addr_t mapping;
2363	struct tx_ring_info *info;
2364
2365	mapping = dma_map_page(&ap->pdev->dev, virt_to_page(skb->data),
2366			       offset_in_page(skb->data), skb->len,
2367			       DMA_TO_DEVICE);
2368
2369	info = ap->skb->tx_skbuff + idx;
2370	info->skb = tail;
2371	dma_unmap_addr_set(info, mapping, mapping);
2372	dma_unmap_len_set(info, maplen, skb->len);
2373	return mapping;
2374}
2375
2376
2377static inline void
2378ace_load_tx_bd(struct ace_private *ap, struct tx_desc *desc, u64 addr,
2379	       u32 flagsize, u32 vlan_tag)
2380{
2381#if !USE_TX_COAL_NOW
2382	flagsize &= ~BD_FLG_COAL_NOW;
2383#endif
2384
2385	if (ACE_IS_TIGON_I(ap)) {
2386		struct tx_desc __iomem *io = (__force struct tx_desc __iomem *) desc;
2387		writel(addr >> 32, &io->addr.addrhi);
2388		writel(addr & 0xffffffff, &io->addr.addrlo);
2389		writel(flagsize, &io->flagsize);
2390		writel(vlan_tag, &io->vlanres);
2391	} else {
2392		desc->addr.addrhi = addr >> 32;
2393		desc->addr.addrlo = addr;
2394		desc->flagsize = flagsize;
2395		desc->vlanres = vlan_tag;
2396	}
2397}
2398
2399
2400static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
2401				  struct net_device *dev)
2402{
2403	struct ace_private *ap = netdev_priv(dev);
2404	struct ace_regs __iomem *regs = ap->regs;
2405	struct tx_desc *desc;
2406	u32 idx, flagsize;
2407	unsigned long maxjiff = jiffies + 3*HZ;
2408
2409restart:
2410	idx = ap->tx_prd;
2411
2412	if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2413		goto overflow;
2414
2415	if (!skb_shinfo(skb)->nr_frags)	{
2416		dma_addr_t mapping;
2417		u32 vlan_tag = 0;
2418
2419		mapping = ace_map_tx_skb(ap, skb, skb, idx);
2420		flagsize = (skb->len << 16) | (BD_FLG_END);
2421		if (skb->ip_summed == CHECKSUM_PARTIAL)
2422			flagsize |= BD_FLG_TCP_UDP_SUM;
2423		if (skb_vlan_tag_present(skb)) {
2424			flagsize |= BD_FLG_VLAN_TAG;
2425			vlan_tag = skb_vlan_tag_get(skb);
2426		}
2427		desc = ap->tx_ring + idx;
2428		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2429
2430		/* Look at ace_tx_int for explanations. */
2431		if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2432			flagsize |= BD_FLG_COAL_NOW;
2433
2434		ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag);
2435	} else {
2436		dma_addr_t mapping;
2437		u32 vlan_tag = 0;
2438		int i;
2439
2440		mapping = ace_map_tx_skb(ap, skb, NULL, idx);
2441		flagsize = (skb_headlen(skb) << 16);
2442		if (skb->ip_summed == CHECKSUM_PARTIAL)
2443			flagsize |= BD_FLG_TCP_UDP_SUM;
2444		if (skb_vlan_tag_present(skb)) {
2445			flagsize |= BD_FLG_VLAN_TAG;
2446			vlan_tag = skb_vlan_tag_get(skb);
2447		}
2448
2449		ace_load_tx_bd(ap, ap->tx_ring + idx, mapping, flagsize, vlan_tag);
2450
2451		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2452
2453		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2454			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2455			struct tx_ring_info *info;
2456
 
2457			info = ap->skb->tx_skbuff + idx;
2458			desc = ap->tx_ring + idx;
2459
2460			mapping = skb_frag_dma_map(&ap->pdev->dev, frag, 0,
2461						   skb_frag_size(frag),
2462						   DMA_TO_DEVICE);
2463
2464			flagsize = skb_frag_size(frag) << 16;
2465			if (skb->ip_summed == CHECKSUM_PARTIAL)
2466				flagsize |= BD_FLG_TCP_UDP_SUM;
2467			idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2468
2469			if (i == skb_shinfo(skb)->nr_frags - 1) {
2470				flagsize |= BD_FLG_END;
2471				if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2472					flagsize |= BD_FLG_COAL_NOW;
2473
2474				/*
2475				 * Only the last fragment frees
2476				 * the skb!
2477				 */
2478				info->skb = skb;
2479			} else {
2480				info->skb = NULL;
2481			}
2482			dma_unmap_addr_set(info, mapping, mapping);
2483			dma_unmap_len_set(info, maplen, skb_frag_size(frag));
2484			ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag);
2485		}
2486	}
2487
2488	wmb();
2489	ap->tx_prd = idx;
2490	ace_set_txprd(regs, ap, idx);
2491
2492	if (flagsize & BD_FLG_COAL_NOW) {
2493		netif_stop_queue(dev);
2494
2495		/*
2496		 * A TX-descriptor producer (an IRQ) might have gotten
2497		 * between, making the ring free again. Since xmit is
2498		 * serialized, this is the only situation we have to
2499		 * re-test.
2500		 */
2501		if (!tx_ring_full(ap, ap->tx_ret_csm, idx))
2502			netif_wake_queue(dev);
2503	}
2504
2505	return NETDEV_TX_OK;
2506
2507overflow:
2508	/*
2509	 * This race condition is unavoidable with lock-free drivers.
2510	 * We wake up the queue _before_ tx_prd is advanced, so that we can
2511	 * enter hard_start_xmit too early, while tx ring still looks closed.
2512	 * This happens ~1-4 times per 100000 packets, so that we can allow
2513	 * to loop syncing to other CPU. Probably, we need an additional
2514	 * wmb() in ace_tx_intr as well.
2515	 *
2516	 * Note that this race is relieved by reserving one more entry
2517	 * in tx ring than it is necessary (see original non-SG driver).
2518	 * However, with SG we need to reserve 2*MAX_SKB_FRAGS+1, which
2519	 * is already overkill.
2520	 *
2521	 * Alternative is to return with 1 not throttling queue. In this
2522	 * case loop becomes longer, no more useful effects.
2523	 */
2524	if (time_before(jiffies, maxjiff)) {
2525		barrier();
2526		cpu_relax();
2527		goto restart;
2528	}
2529
2530	/* The ring is stuck full. */
2531	printk(KERN_WARNING "%s: Transmit ring stuck full\n", dev->name);
2532	return NETDEV_TX_BUSY;
2533}
2534
2535
2536static int ace_change_mtu(struct net_device *dev, int new_mtu)
2537{
2538	struct ace_private *ap = netdev_priv(dev);
2539	struct ace_regs __iomem *regs = ap->regs;
2540
2541	writel(new_mtu + ETH_HLEN + 4, &regs->IfMtu);
2542	dev->mtu = new_mtu;
2543
2544	if (new_mtu > ACE_STD_MTU) {
2545		if (!(ap->jumbo)) {
2546			printk(KERN_INFO "%s: Enabling Jumbo frame "
2547			       "support\n", dev->name);
2548			ap->jumbo = 1;
2549			if (!test_and_set_bit(0, &ap->jumbo_refill_busy))
2550				ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
2551			ace_set_rxtx_parms(dev, 1);
2552		}
2553	} else {
2554		while (test_and_set_bit(0, &ap->jumbo_refill_busy));
2555		ace_sync_irq(dev->irq);
2556		ace_set_rxtx_parms(dev, 0);
2557		if (ap->jumbo) {
2558			struct cmd cmd;
2559
2560			cmd.evt = C_RESET_JUMBO_RNG;
2561			cmd.code = 0;
2562			cmd.idx = 0;
2563			ace_issue_cmd(regs, &cmd);
2564		}
2565	}
2566
2567	return 0;
2568}
2569
2570static int ace_get_link_ksettings(struct net_device *dev,
2571				  struct ethtool_link_ksettings *cmd)
2572{
2573	struct ace_private *ap = netdev_priv(dev);
2574	struct ace_regs __iomem *regs = ap->regs;
2575	u32 link;
2576	u32 supported;
2577
2578	memset(cmd, 0, sizeof(struct ethtool_link_ksettings));
2579
2580	supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
2581		     SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
2582		     SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full |
2583		     SUPPORTED_Autoneg | SUPPORTED_FIBRE);
2584
2585	cmd->base.port = PORT_FIBRE;
2586
2587	link = readl(&regs->GigLnkState);
2588	if (link & LNK_1000MB) {
2589		cmd->base.speed = SPEED_1000;
2590	} else {
2591		link = readl(&regs->FastLnkState);
2592		if (link & LNK_100MB)
2593			cmd->base.speed = SPEED_100;
2594		else if (link & LNK_10MB)
2595			cmd->base.speed = SPEED_10;
2596		else
2597			cmd->base.speed = 0;
2598	}
2599	if (link & LNK_FULL_DUPLEX)
2600		cmd->base.duplex = DUPLEX_FULL;
2601	else
2602		cmd->base.duplex = DUPLEX_HALF;
2603
2604	if (link & LNK_NEGOTIATE)
2605		cmd->base.autoneg = AUTONEG_ENABLE;
2606	else
2607		cmd->base.autoneg = AUTONEG_DISABLE;
2608
2609#if 0
2610	/*
2611	 * Current struct ethtool_cmd is insufficient
2612	 */
2613	ecmd->trace = readl(&regs->TuneTrace);
2614
2615	ecmd->txcoal = readl(&regs->TuneTxCoalTicks);
2616	ecmd->rxcoal = readl(&regs->TuneRxCoalTicks);
2617#endif
2618
2619	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
2620						supported);
2621
2622	return 0;
2623}
2624
2625static int ace_set_link_ksettings(struct net_device *dev,
2626				  const struct ethtool_link_ksettings *cmd)
2627{
2628	struct ace_private *ap = netdev_priv(dev);
2629	struct ace_regs __iomem *regs = ap->regs;
2630	u32 link, speed;
2631
2632	link = readl(&regs->GigLnkState);
2633	if (link & LNK_1000MB)
2634		speed = SPEED_1000;
2635	else {
2636		link = readl(&regs->FastLnkState);
2637		if (link & LNK_100MB)
2638			speed = SPEED_100;
2639		else if (link & LNK_10MB)
2640			speed = SPEED_10;
2641		else
2642			speed = SPEED_100;
2643	}
2644
2645	link = LNK_ENABLE | LNK_1000MB | LNK_100MB | LNK_10MB |
2646		LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL;
2647	if (!ACE_IS_TIGON_I(ap))
2648		link |= LNK_TX_FLOW_CTL_Y;
2649	if (cmd->base.autoneg == AUTONEG_ENABLE)
2650		link |= LNK_NEGOTIATE;
2651	if (cmd->base.speed != speed) {
2652		link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB);
2653		switch (cmd->base.speed) {
2654		case SPEED_1000:
2655			link |= LNK_1000MB;
2656			break;
2657		case SPEED_100:
2658			link |= LNK_100MB;
2659			break;
2660		case SPEED_10:
2661			link |= LNK_10MB;
2662			break;
2663		}
2664	}
2665
2666	if (cmd->base.duplex == DUPLEX_FULL)
2667		link |= LNK_FULL_DUPLEX;
2668
2669	if (link != ap->link) {
2670		struct cmd cmd;
2671		printk(KERN_INFO "%s: Renegotiating link state\n",
2672		       dev->name);
2673
2674		ap->link = link;
2675		writel(link, &regs->TuneLink);
2676		if (!ACE_IS_TIGON_I(ap))
2677			writel(link, &regs->TuneFastLink);
2678		wmb();
2679
2680		cmd.evt = C_LNK_NEGOTIATION;
2681		cmd.code = 0;
2682		cmd.idx = 0;
2683		ace_issue_cmd(regs, &cmd);
2684	}
2685	return 0;
2686}
2687
2688static void ace_get_drvinfo(struct net_device *dev,
2689			    struct ethtool_drvinfo *info)
2690{
2691	struct ace_private *ap = netdev_priv(dev);
2692
2693	strscpy(info->driver, "acenic", sizeof(info->driver));
2694	snprintf(info->fw_version, sizeof(info->version), "%i.%i.%i",
2695		 ap->firmware_major, ap->firmware_minor, ap->firmware_fix);
 
2696
2697	if (ap->pdev)
2698		strscpy(info->bus_info, pci_name(ap->pdev),
2699			sizeof(info->bus_info));
2700
2701}
2702
2703/*
2704 * Set the hardware MAC address.
2705 */
2706static int ace_set_mac_addr(struct net_device *dev, void *p)
2707{
2708	struct ace_private *ap = netdev_priv(dev);
2709	struct ace_regs __iomem *regs = ap->regs;
2710	struct sockaddr *addr=p;
2711	const u8 *da;
2712	struct cmd cmd;
2713
2714	if(netif_running(dev))
2715		return -EBUSY;
2716
2717	eth_hw_addr_set(dev, addr->sa_data);
2718
2719	da = (const u8 *)dev->dev_addr;
2720
2721	writel(da[0] << 8 | da[1], &regs->MacAddrHi);
2722	writel((da[2] << 24) | (da[3] << 16) | (da[4] << 8) | da[5],
2723	       &regs->MacAddrLo);
2724
2725	cmd.evt = C_SET_MAC_ADDR;
2726	cmd.code = 0;
2727	cmd.idx = 0;
2728	ace_issue_cmd(regs, &cmd);
2729
2730	return 0;
2731}
2732
2733
2734static void ace_set_multicast_list(struct net_device *dev)
2735{
2736	struct ace_private *ap = netdev_priv(dev);
2737	struct ace_regs __iomem *regs = ap->regs;
2738	struct cmd cmd;
2739
2740	if ((dev->flags & IFF_ALLMULTI) && !(ap->mcast_all)) {
2741		cmd.evt = C_SET_MULTICAST_MODE;
2742		cmd.code = C_C_MCAST_ENABLE;
2743		cmd.idx = 0;
2744		ace_issue_cmd(regs, &cmd);
2745		ap->mcast_all = 1;
2746	} else if (ap->mcast_all) {
2747		cmd.evt = C_SET_MULTICAST_MODE;
2748		cmd.code = C_C_MCAST_DISABLE;
2749		cmd.idx = 0;
2750		ace_issue_cmd(regs, &cmd);
2751		ap->mcast_all = 0;
2752	}
2753
2754	if ((dev->flags & IFF_PROMISC) && !(ap->promisc)) {
2755		cmd.evt = C_SET_PROMISC_MODE;
2756		cmd.code = C_C_PROMISC_ENABLE;
2757		cmd.idx = 0;
2758		ace_issue_cmd(regs, &cmd);
2759		ap->promisc = 1;
2760	}else if (!(dev->flags & IFF_PROMISC) && (ap->promisc)) {
2761		cmd.evt = C_SET_PROMISC_MODE;
2762		cmd.code = C_C_PROMISC_DISABLE;
2763		cmd.idx = 0;
2764		ace_issue_cmd(regs, &cmd);
2765		ap->promisc = 0;
2766	}
2767
2768	/*
2769	 * For the time being multicast relies on the upper layers
2770	 * filtering it properly. The Firmware does not allow one to
2771	 * set the entire multicast list at a time and keeping track of
2772	 * it here is going to be messy.
2773	 */
2774	if (!netdev_mc_empty(dev) && !ap->mcast_all) {
2775		cmd.evt = C_SET_MULTICAST_MODE;
2776		cmd.code = C_C_MCAST_ENABLE;
2777		cmd.idx = 0;
2778		ace_issue_cmd(regs, &cmd);
2779	}else if (!ap->mcast_all) {
2780		cmd.evt = C_SET_MULTICAST_MODE;
2781		cmd.code = C_C_MCAST_DISABLE;
2782		cmd.idx = 0;
2783		ace_issue_cmd(regs, &cmd);
2784	}
2785}
2786
2787
2788static struct net_device_stats *ace_get_stats(struct net_device *dev)
2789{
2790	struct ace_private *ap = netdev_priv(dev);
2791	struct ace_mac_stats __iomem *mac_stats =
2792		(struct ace_mac_stats __iomem *)ap->regs->Stats;
2793
2794	dev->stats.rx_missed_errors = readl(&mac_stats->drop_space);
2795	dev->stats.multicast = readl(&mac_stats->kept_mc);
2796	dev->stats.collisions = readl(&mac_stats->coll);
2797
2798	return &dev->stats;
2799}
2800
2801
2802static void ace_copy(struct ace_regs __iomem *regs, const __be32 *src,
2803		     u32 dest, int size)
2804{
2805	void __iomem *tdest;
2806	short tsize, i;
2807
2808	if (size <= 0)
2809		return;
2810
2811	while (size > 0) {
2812		tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
2813			    min_t(u32, size, ACE_WINDOW_SIZE));
2814		tdest = (void __iomem *) &regs->Window +
2815			(dest & (ACE_WINDOW_SIZE - 1));
2816		writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);
2817		for (i = 0; i < (tsize / 4); i++) {
2818			/* Firmware is big-endian */
2819			writel(be32_to_cpup(src), tdest);
2820			src++;
2821			tdest += 4;
2822			dest += 4;
2823			size -= 4;
2824		}
2825	}
2826}
2827
2828
2829static void ace_clear(struct ace_regs __iomem *regs, u32 dest, int size)
2830{
2831	void __iomem *tdest;
2832	short tsize = 0, i;
2833
2834	if (size <= 0)
2835		return;
2836
2837	while (size > 0) {
2838		tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
2839				min_t(u32, size, ACE_WINDOW_SIZE));
2840		tdest = (void __iomem *) &regs->Window +
2841			(dest & (ACE_WINDOW_SIZE - 1));
2842		writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);
2843
2844		for (i = 0; i < (tsize / 4); i++) {
2845			writel(0, tdest + i*4);
2846		}
2847
2848		dest += tsize;
2849		size -= tsize;
2850	}
2851}
2852
2853
2854/*
2855 * Download the firmware into the SRAM on the NIC
2856 *
2857 * This operation requires the NIC to be halted and is performed with
2858 * interrupts disabled and with the spinlock hold.
2859 */
2860static int ace_load_firmware(struct net_device *dev)
2861{
2862	const struct firmware *fw;
2863	const char *fw_name = "acenic/tg2.bin";
2864	struct ace_private *ap = netdev_priv(dev);
2865	struct ace_regs __iomem *regs = ap->regs;
2866	const __be32 *fw_data;
2867	u32 load_addr;
2868	int ret;
2869
2870	if (!(readl(&regs->CpuCtrl) & CPU_HALTED)) {
2871		printk(KERN_ERR "%s: trying to download firmware while the "
2872		       "CPU is running!\n", ap->name);
2873		return -EFAULT;
2874	}
2875
2876	if (ACE_IS_TIGON_I(ap))
2877		fw_name = "acenic/tg1.bin";
2878
2879	ret = request_firmware(&fw, fw_name, &ap->pdev->dev);
2880	if (ret) {
2881		printk(KERN_ERR "%s: Failed to load firmware \"%s\"\n",
2882		       ap->name, fw_name);
2883		return ret;
2884	}
2885
2886	fw_data = (void *)fw->data;
2887
2888	/* Firmware blob starts with version numbers, followed by
2889	   load and start address. Remainder is the blob to be loaded
2890	   contiguously from load address. We don't bother to represent
2891	   the BSS/SBSS sections any more, since we were clearing the
2892	   whole thing anyway. */
2893	ap->firmware_major = fw->data[0];
2894	ap->firmware_minor = fw->data[1];
2895	ap->firmware_fix = fw->data[2];
2896
2897	ap->firmware_start = be32_to_cpu(fw_data[1]);
2898	if (ap->firmware_start < 0x4000 || ap->firmware_start >= 0x80000) {
2899		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
2900		       ap->name, ap->firmware_start, fw_name);
2901		ret = -EINVAL;
2902		goto out;
2903	}
2904
2905	load_addr = be32_to_cpu(fw_data[2]);
2906	if (load_addr < 0x4000 || load_addr >= 0x80000) {
2907		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
2908		       ap->name, load_addr, fw_name);
2909		ret = -EINVAL;
2910		goto out;
2911	}
2912
2913	/*
2914	 * Do not try to clear more than 512KiB or we end up seeing
2915	 * funny things on NICs with only 512KiB SRAM
2916	 */
2917	ace_clear(regs, 0x2000, 0x80000-0x2000);
2918	ace_copy(regs, &fw_data[3], load_addr, fw->size-12);
2919 out:
2920	release_firmware(fw);
2921	return ret;
2922}
2923
2924
2925/*
2926 * The eeprom on the AceNIC is an Atmel i2c EEPROM.
2927 *
2928 * Accessing the EEPROM is `interesting' to say the least - don't read
2929 * this code right after dinner.
2930 *
2931 * This is all about black magic and bit-banging the device .... I
2932 * wonder in what hospital they have put the guy who designed the i2c
2933 * specs.
2934 *
2935 * Oh yes, this is only the beginning!
2936 *
2937 * Thanks to Stevarino Webinski for helping tracking down the bugs in the
2938 * code i2c readout code by beta testing all my hacks.
2939 */
2940static void eeprom_start(struct ace_regs __iomem *regs)
2941{
2942	u32 local;
2943
2944	readl(&regs->LocalCtrl);
2945	udelay(ACE_SHORT_DELAY);
2946	local = readl(&regs->LocalCtrl);
2947	local |= EEPROM_DATA_OUT | EEPROM_WRITE_ENABLE;
2948	writel(local, &regs->LocalCtrl);
2949	readl(&regs->LocalCtrl);
2950	mb();
2951	udelay(ACE_SHORT_DELAY);
2952	local |= EEPROM_CLK_OUT;
2953	writel(local, &regs->LocalCtrl);
2954	readl(&regs->LocalCtrl);
2955	mb();
2956	udelay(ACE_SHORT_DELAY);
2957	local &= ~EEPROM_DATA_OUT;
2958	writel(local, &regs->LocalCtrl);
2959	readl(&regs->LocalCtrl);
2960	mb();
2961	udelay(ACE_SHORT_DELAY);
2962	local &= ~EEPROM_CLK_OUT;
2963	writel(local, &regs->LocalCtrl);
2964	readl(&regs->LocalCtrl);
2965	mb();
2966}
2967
2968
2969static void eeprom_prep(struct ace_regs __iomem *regs, u8 magic)
2970{
2971	short i;
2972	u32 local;
2973
2974	udelay(ACE_SHORT_DELAY);
2975	local = readl(&regs->LocalCtrl);
2976	local &= ~EEPROM_DATA_OUT;
2977	local |= EEPROM_WRITE_ENABLE;
2978	writel(local, &regs->LocalCtrl);
2979	readl(&regs->LocalCtrl);
2980	mb();
2981
2982	for (i = 0; i < 8; i++, magic <<= 1) {
2983		udelay(ACE_SHORT_DELAY);
2984		if (magic & 0x80)
2985			local |= EEPROM_DATA_OUT;
2986		else
2987			local &= ~EEPROM_DATA_OUT;
2988		writel(local, &regs->LocalCtrl);
2989		readl(&regs->LocalCtrl);
2990		mb();
2991
2992		udelay(ACE_SHORT_DELAY);
2993		local |= EEPROM_CLK_OUT;
2994		writel(local, &regs->LocalCtrl);
2995		readl(&regs->LocalCtrl);
2996		mb();
2997		udelay(ACE_SHORT_DELAY);
2998		local &= ~(EEPROM_CLK_OUT | EEPROM_DATA_OUT);
2999		writel(local, &regs->LocalCtrl);
3000		readl(&regs->LocalCtrl);
3001		mb();
3002	}
3003}
3004
3005
3006static int eeprom_check_ack(struct ace_regs __iomem *regs)
3007{
3008	int state;
3009	u32 local;
3010
3011	local = readl(&regs->LocalCtrl);
3012	local &= ~EEPROM_WRITE_ENABLE;
3013	writel(local, &regs->LocalCtrl);
3014	readl(&regs->LocalCtrl);
3015	mb();
3016	udelay(ACE_LONG_DELAY);
3017	local |= EEPROM_CLK_OUT;
3018	writel(local, &regs->LocalCtrl);
3019	readl(&regs->LocalCtrl);
3020	mb();
3021	udelay(ACE_SHORT_DELAY);
3022	/* sample data in middle of high clk */
3023	state = (readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0;
3024	udelay(ACE_SHORT_DELAY);
3025	mb();
3026	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
3027	readl(&regs->LocalCtrl);
3028	mb();
3029
3030	return state;
3031}
3032
3033
3034static void eeprom_stop(struct ace_regs __iomem *regs)
3035{
3036	u32 local;
3037
3038	udelay(ACE_SHORT_DELAY);
3039	local = readl(&regs->LocalCtrl);
3040	local |= EEPROM_WRITE_ENABLE;
3041	writel(local, &regs->LocalCtrl);
3042	readl(&regs->LocalCtrl);
3043	mb();
3044	udelay(ACE_SHORT_DELAY);
3045	local &= ~EEPROM_DATA_OUT;
3046	writel(local, &regs->LocalCtrl);
3047	readl(&regs->LocalCtrl);
3048	mb();
3049	udelay(ACE_SHORT_DELAY);
3050	local |= EEPROM_CLK_OUT;
3051	writel(local, &regs->LocalCtrl);
3052	readl(&regs->LocalCtrl);
3053	mb();
3054	udelay(ACE_SHORT_DELAY);
3055	local |= EEPROM_DATA_OUT;
3056	writel(local, &regs->LocalCtrl);
3057	readl(&regs->LocalCtrl);
3058	mb();
3059	udelay(ACE_LONG_DELAY);
3060	local &= ~EEPROM_CLK_OUT;
3061	writel(local, &regs->LocalCtrl);
3062	mb();
3063}
3064
3065
3066/*
3067 * Read a whole byte from the EEPROM.
3068 */
3069static int read_eeprom_byte(struct net_device *dev, unsigned long offset)
3070{
3071	struct ace_private *ap = netdev_priv(dev);
3072	struct ace_regs __iomem *regs = ap->regs;
3073	unsigned long flags;
3074	u32 local;
3075	int result = 0;
3076	short i;
3077
3078	/*
3079	 * Don't take interrupts on this CPU will bit banging
3080	 * the %#%#@$ I2C device
3081	 */
3082	local_irq_save(flags);
3083
3084	eeprom_start(regs);
3085
3086	eeprom_prep(regs, EEPROM_WRITE_SELECT);
3087	if (eeprom_check_ack(regs)) {
3088		local_irq_restore(flags);
3089		printk(KERN_ERR "%s: Unable to sync eeprom\n", ap->name);
3090		result = -EIO;
3091		goto eeprom_read_error;
3092	}
3093
3094	eeprom_prep(regs, (offset >> 8) & 0xff);
3095	if (eeprom_check_ack(regs)) {
3096		local_irq_restore(flags);
3097		printk(KERN_ERR "%s: Unable to set address byte 0\n",
3098		       ap->name);
3099		result = -EIO;
3100		goto eeprom_read_error;
3101	}
3102
3103	eeprom_prep(regs, offset & 0xff);
3104	if (eeprom_check_ack(regs)) {
3105		local_irq_restore(flags);
3106		printk(KERN_ERR "%s: Unable to set address byte 1\n",
3107		       ap->name);
3108		result = -EIO;
3109		goto eeprom_read_error;
3110	}
3111
3112	eeprom_start(regs);
3113	eeprom_prep(regs, EEPROM_READ_SELECT);
3114	if (eeprom_check_ack(regs)) {
3115		local_irq_restore(flags);
3116		printk(KERN_ERR "%s: Unable to set READ_SELECT\n",
3117		       ap->name);
3118		result = -EIO;
3119		goto eeprom_read_error;
3120	}
3121
3122	for (i = 0; i < 8; i++) {
3123		local = readl(&regs->LocalCtrl);
3124		local &= ~EEPROM_WRITE_ENABLE;
3125		writel(local, &regs->LocalCtrl);
3126		readl(&regs->LocalCtrl);
3127		udelay(ACE_LONG_DELAY);
3128		mb();
3129		local |= EEPROM_CLK_OUT;
3130		writel(local, &regs->LocalCtrl);
3131		readl(&regs->LocalCtrl);
3132		mb();
3133		udelay(ACE_SHORT_DELAY);
3134		/* sample data mid high clk */
3135		result = (result << 1) |
3136			((readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0);
3137		udelay(ACE_SHORT_DELAY);
3138		mb();
3139		local = readl(&regs->LocalCtrl);
3140		local &= ~EEPROM_CLK_OUT;
3141		writel(local, &regs->LocalCtrl);
3142		readl(&regs->LocalCtrl);
3143		udelay(ACE_SHORT_DELAY);
3144		mb();
3145		if (i == 7) {
3146			local |= EEPROM_WRITE_ENABLE;
3147			writel(local, &regs->LocalCtrl);
3148			readl(&regs->LocalCtrl);
3149			mb();
3150			udelay(ACE_SHORT_DELAY);
3151		}
3152	}
3153
3154	local |= EEPROM_DATA_OUT;
3155	writel(local, &regs->LocalCtrl);
3156	readl(&regs->LocalCtrl);
3157	mb();
3158	udelay(ACE_SHORT_DELAY);
3159	writel(readl(&regs->LocalCtrl) | EEPROM_CLK_OUT, &regs->LocalCtrl);
3160	readl(&regs->LocalCtrl);
3161	udelay(ACE_LONG_DELAY);
3162	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
3163	readl(&regs->LocalCtrl);
3164	mb();
3165	udelay(ACE_SHORT_DELAY);
3166	eeprom_stop(regs);
3167
3168	local_irq_restore(flags);
3169 out:
3170	return result;
3171
3172 eeprom_read_error:
3173	printk(KERN_ERR "%s: Unable to read eeprom byte 0x%02lx\n",
3174	       ap->name, offset);
3175	goto out;
3176}
3177
3178module_pci_driver(acenic_pci_driver);
v4.17
 
   1/*
   2 * acenic.c: Linux driver for the Alteon AceNIC Gigabit Ethernet card
   3 *           and other Tigon based cards.
   4 *
   5 * Copyright 1998-2002 by Jes Sorensen, <jes@trained-monkey.org>.
   6 *
   7 * Thanks to Alteon and 3Com for providing hardware and documentation
   8 * enabling me to write this driver.
   9 *
  10 * A mailing list for discussing the use of this driver has been
  11 * setup, please subscribe to the lists if you have any questions
  12 * about the driver. Send mail to linux-acenic-help@sunsite.auc.dk to
  13 * see how to subscribe.
  14 *
  15 * This program is free software; you can redistribute it and/or modify
  16 * it under the terms of the GNU General Public License as published by
  17 * the Free Software Foundation; either version 2 of the License, or
  18 * (at your option) any later version.
  19 *
  20 * Additional credits:
  21 *   Pete Wyckoff <wyckoff@ca.sandia.gov>: Initial Linux/Alpha and trace
  22 *       dump support. The trace dump support has not been
  23 *       integrated yet however.
  24 *   Troy Benjegerdes: Big Endian (PPC) patches.
  25 *   Nate Stahl: Better out of memory handling and stats support.
  26 *   Aman Singla: Nasty race between interrupt handler and tx code dealing
  27 *                with 'testing the tx_ret_csm and setting tx_full'
  28 *   David S. Miller <davem@redhat.com>: conversion to new PCI dma mapping
  29 *                                       infrastructure and Sparc support
  30 *   Pierrick Pinasseau (CERN): For lending me an Ultra 5 to test the
  31 *                              driver under Linux/Sparc64
  32 *   Matt Domsch <Matt_Domsch@dell.com>: Detect Alteon 1000baseT cards
  33 *                                       ETHTOOL_GDRVINFO support
  34 *   Chip Salzenberg <chip@valinux.com>: Fix race condition between tx
  35 *                                       handler and close() cleanup.
  36 *   Ken Aaker <kdaaker@rchland.vnet.ibm.com>: Correct check for whether
  37 *                                       memory mapped IO is enabled to
  38 *                                       make the driver work on RS/6000.
  39 *   Takayoshi Kouchi <kouchi@hpc.bs1.fc.nec.co.jp>: Identifying problem
  40 *                                       where the driver would disable
  41 *                                       bus master mode if it had to disable
  42 *                                       write and invalidate.
  43 *   Stephen Hack <stephen_hack@hp.com>: Fixed ace_set_mac_addr for little
  44 *                                       endian systems.
  45 *   Val Henson <vhenson@esscom.com>:    Reset Jumbo skb producer and
  46 *                                       rx producer index when
  47 *                                       flushing the Jumbo ring.
  48 *   Hans Grobler <grobh@sun.ac.za>:     Memory leak fixes in the
  49 *                                       driver init path.
  50 *   Grant Grundler <grundler@cup.hp.com>: PCI write posting fixes.
  51 */
  52
  53#include <linux/module.h>
  54#include <linux/moduleparam.h>
  55#include <linux/types.h>
  56#include <linux/errno.h>
  57#include <linux/ioport.h>
  58#include <linux/pci.h>
  59#include <linux/dma-mapping.h>
  60#include <linux/kernel.h>
  61#include <linux/netdevice.h>
  62#include <linux/etherdevice.h>
  63#include <linux/skbuff.h>
  64#include <linux/delay.h>
  65#include <linux/mm.h>
  66#include <linux/highmem.h>
  67#include <linux/sockios.h>
  68#include <linux/firmware.h>
  69#include <linux/slab.h>
  70#include <linux/prefetch.h>
  71#include <linux/if_vlan.h>
  72
  73#ifdef SIOCETHTOOL
  74#include <linux/ethtool.h>
  75#endif
  76
  77#include <net/sock.h>
  78#include <net/ip.h>
  79
  80#include <asm/io.h>
  81#include <asm/irq.h>
  82#include <asm/byteorder.h>
  83#include <linux/uaccess.h>
  84
  85
  86#define DRV_NAME "acenic"
  87
  88#undef INDEX_DEBUG
  89
  90#ifdef CONFIG_ACENIC_OMIT_TIGON_I
  91#define ACE_IS_TIGON_I(ap)	0
  92#define ACE_TX_RING_ENTRIES(ap)	MAX_TX_RING_ENTRIES
  93#else
  94#define ACE_IS_TIGON_I(ap)	(ap->version == 1)
  95#define ACE_TX_RING_ENTRIES(ap)	ap->tx_ring_entries
  96#endif
  97
  98#ifndef PCI_VENDOR_ID_ALTEON
  99#define PCI_VENDOR_ID_ALTEON		0x12ae
 100#endif
 101#ifndef PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE
 102#define PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE  0x0001
 103#define PCI_DEVICE_ID_ALTEON_ACENIC_COPPER 0x0002
 104#endif
 105#ifndef PCI_DEVICE_ID_3COM_3C985
 106#define PCI_DEVICE_ID_3COM_3C985	0x0001
 107#endif
 108#ifndef PCI_VENDOR_ID_NETGEAR
 109#define PCI_VENDOR_ID_NETGEAR		0x1385
 110#define PCI_DEVICE_ID_NETGEAR_GA620	0x620a
 111#endif
 112#ifndef PCI_DEVICE_ID_NETGEAR_GA620T
 113#define PCI_DEVICE_ID_NETGEAR_GA620T	0x630a
 114#endif
 115
 116
 117/*
 118 * Farallon used the DEC vendor ID by mistake and they seem not
 119 * to care - stinky!
 120 */
 121#ifndef PCI_DEVICE_ID_FARALLON_PN9000SX
 122#define PCI_DEVICE_ID_FARALLON_PN9000SX	0x1a
 123#endif
 124#ifndef PCI_DEVICE_ID_FARALLON_PN9100T
 125#define PCI_DEVICE_ID_FARALLON_PN9100T  0xfa
 126#endif
 127#ifndef PCI_VENDOR_ID_SGI
 128#define PCI_VENDOR_ID_SGI		0x10a9
 129#endif
 130#ifndef PCI_DEVICE_ID_SGI_ACENIC
 131#define PCI_DEVICE_ID_SGI_ACENIC	0x0009
 132#endif
 133
 134static const struct pci_device_id acenic_pci_tbl[] = {
 135	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE,
 136	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 137	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_COPPER,
 138	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 139	{ PCI_VENDOR_ID_3COM, PCI_DEVICE_ID_3COM_3C985,
 140	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 141	{ PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620,
 142	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 143	{ PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620T,
 144	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 145	/*
 146	 * Farallon used the DEC vendor ID on their cards incorrectly,
 147	 * then later Alteon's ID.
 148	 */
 149	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_FARALLON_PN9000SX,
 150	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 151	{ PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_FARALLON_PN9100T,
 152	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 153	{ PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_ACENIC,
 154	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, },
 155	{ }
 156};
 157MODULE_DEVICE_TABLE(pci, acenic_pci_tbl);
 158
 159#define ace_sync_irq(irq)	synchronize_irq(irq)
 160
 161#ifndef offset_in_page
 162#define offset_in_page(ptr)	((unsigned long)(ptr) & ~PAGE_MASK)
 163#endif
 164
 165#define ACE_MAX_MOD_PARMS	8
 166#define BOARD_IDX_STATIC	0
 167#define BOARD_IDX_OVERFLOW	-1
 168
 169#include "acenic.h"
 170
 171/*
 172 * These must be defined before the firmware is included.
 173 */
 174#define MAX_TEXT_LEN	96*1024
 175#define MAX_RODATA_LEN	8*1024
 176#define MAX_DATA_LEN	2*1024
 177
 178#ifndef tigon2FwReleaseLocal
 179#define tigon2FwReleaseLocal 0
 180#endif
 181
 182/*
 183 * This driver currently supports Tigon I and Tigon II based cards
 184 * including the Alteon AceNIC, the 3Com 3C985[B] and NetGear
 185 * GA620. The driver should also work on the SGI, DEC and Farallon
 186 * versions of the card, however I have not been able to test that
 187 * myself.
 188 *
 189 * This card is really neat, it supports receive hardware checksumming
 190 * and jumbo frames (up to 9000 bytes) and does a lot of work in the
 191 * firmware. Also the programming interface is quite neat, except for
 192 * the parts dealing with the i2c eeprom on the card ;-)
 193 *
 194 * Using jumbo frames:
 195 *
 196 * To enable jumbo frames, simply specify an mtu between 1500 and 9000
 197 * bytes to ifconfig. Jumbo frames can be enabled or disabled at any time
 198 * by running `ifconfig eth<X> mtu <MTU>' with <X> being the Ethernet
 199 * interface number and <MTU> being the MTU value.
 200 *
 201 * Module parameters:
 202 *
 203 * When compiled as a loadable module, the driver allows for a number
 204 * of module parameters to be specified. The driver supports the
 205 * following module parameters:
 206 *
 207 *  trace=<val> - Firmware trace level. This requires special traced
 208 *                firmware to replace the firmware supplied with
 209 *                the driver - for debugging purposes only.
 210 *
 211 *  link=<val>  - Link state. Normally you want to use the default link
 212 *                parameters set by the driver. This can be used to
 213 *                override these in case your switch doesn't negotiate
 214 *                the link properly. Valid values are:
 215 *         0x0001 - Force half duplex link.
 216 *         0x0002 - Do not negotiate line speed with the other end.
 217 *         0x0010 - 10Mbit/sec link.
 218 *         0x0020 - 100Mbit/sec link.
 219 *         0x0040 - 1000Mbit/sec link.
 220 *         0x0100 - Do not negotiate flow control.
 221 *         0x0200 - Enable RX flow control Y
 222 *         0x0400 - Enable TX flow control Y (Tigon II NICs only).
 223 *                Default value is 0x0270, ie. enable link+flow
 224 *                control negotiation. Negotiating the highest
 225 *                possible link speed with RX flow control enabled.
 226 *
 227 *                When disabling link speed negotiation, only one link
 228 *                speed is allowed to be specified!
 229 *
 230 *  tx_coal_tick=<val> - number of coalescing clock ticks (us) allowed
 231 *                to wait for more packets to arive before
 232 *                interrupting the host, from the time the first
 233 *                packet arrives.
 234 *
 235 *  rx_coal_tick=<val> - number of coalescing clock ticks (us) allowed
 236 *                to wait for more packets to arive in the transmit ring,
 237 *                before interrupting the host, after transmitting the
 238 *                first packet in the ring.
 239 *
 240 *  max_tx_desc=<val> - maximum number of transmit descriptors
 241 *                (packets) transmitted before interrupting the host.
 242 *
 243 *  max_rx_desc=<val> - maximum number of receive descriptors
 244 *                (packets) received before interrupting the host.
 245 *
 246 *  tx_ratio=<val> - 7 bit value (0 - 63) specifying the split in 64th
 247 *                increments of the NIC's on board memory to be used for
 248 *                transmit and receive buffers. For the 1MB NIC app. 800KB
 249 *                is available, on the 1/2MB NIC app. 300KB is available.
 250 *                68KB will always be available as a minimum for both
 251 *                directions. The default value is a 50/50 split.
 252 *  dis_pci_mem_inval=<val> - disable PCI memory write and invalidate
 253 *                operations, default (1) is to always disable this as
 254 *                that is what Alteon does on NT. I have not been able
 255 *                to measure any real performance differences with
 256 *                this on my systems. Set <val>=0 if you want to
 257 *                enable these operations.
 258 *
 259 * If you use more than one NIC, specify the parameters for the
 260 * individual NICs with a comma, ie. trace=0,0x00001fff,0 you want to
 261 * run tracing on NIC #2 but not on NIC #1 and #3.
 262 *
 263 * TODO:
 264 *
 265 * - Proper multicast support.
 266 * - NIC dump support.
 267 * - More tuning parameters.
 268 *
 269 * The mini ring is not used under Linux and I am not sure it makes sense
 270 * to actually use it.
 271 *
 272 * New interrupt handler strategy:
 273 *
 274 * The old interrupt handler worked using the traditional method of
 275 * replacing an skbuff with a new one when a packet arrives. However
 276 * the rx rings do not need to contain a static number of buffer
 277 * descriptors, thus it makes sense to move the memory allocation out
 278 * of the main interrupt handler and do it in a bottom half handler
 279 * and only allocate new buffers when the number of buffers in the
 280 * ring is below a certain threshold. In order to avoid starving the
 281 * NIC under heavy load it is however necessary to force allocation
 282 * when hitting a minimum threshold. The strategy for alloction is as
 283 * follows:
 284 *
 285 *     RX_LOW_BUF_THRES    - allocate buffers in the bottom half
 286 *     RX_PANIC_LOW_THRES  - we are very low on buffers, allocate
 287 *                           the buffers in the interrupt handler
 288 *     RX_RING_THRES       - maximum number of buffers in the rx ring
 289 *     RX_MINI_THRES       - maximum number of buffers in the mini ring
 290 *     RX_JUMBO_THRES      - maximum number of buffers in the jumbo ring
 291 *
 292 * One advantagous side effect of this allocation approach is that the
 293 * entire rx processing can be done without holding any spin lock
 294 * since the rx rings and registers are totally independent of the tx
 295 * ring and its registers.  This of course includes the kmalloc's of
 296 * new skb's. Thus start_xmit can run in parallel with rx processing
 297 * and the memory allocation on SMP systems.
 298 *
 299 * Note that running the skb reallocation in a bottom half opens up
 300 * another can of races which needs to be handled properly. In
 301 * particular it can happen that the interrupt handler tries to run
 302 * the reallocation while the bottom half is either running on another
 303 * CPU or was interrupted on the same CPU. To get around this the
 304 * driver uses bitops to prevent the reallocation routines from being
 305 * reentered.
 306 *
 307 * TX handling can also be done without holding any spin lock, wheee
 308 * this is fun! since tx_ret_csm is only written to by the interrupt
 309 * handler. The case to be aware of is when shutting down the device
 310 * and cleaning up where it is necessary to make sure that
 311 * start_xmit() is not running while this is happening. Well DaveM
 312 * informs me that this case is already protected against ... bye bye
 313 * Mr. Spin Lock, it was nice to know you.
 314 *
 315 * TX interrupts are now partly disabled so the NIC will only generate
 316 * TX interrupts for the number of coal ticks, not for the number of
 317 * TX packets in the queue. This should reduce the number of TX only,
 318 * ie. when no RX processing is done, interrupts seen.
 319 */
 320
 321/*
 322 * Threshold values for RX buffer allocation - the low water marks for
 323 * when to start refilling the rings are set to 75% of the ring
 324 * sizes. It seems to make sense to refill the rings entirely from the
 325 * intrrupt handler once it gets below the panic threshold, that way
 326 * we don't risk that the refilling is moved to another CPU when the
 327 * one running the interrupt handler just got the slab code hot in its
 328 * cache.
 329 */
 330#define RX_RING_SIZE		72
 331#define RX_MINI_SIZE		64
 332#define RX_JUMBO_SIZE		48
 333
 334#define RX_PANIC_STD_THRES	16
 335#define RX_PANIC_STD_REFILL	(3*RX_PANIC_STD_THRES)/2
 336#define RX_LOW_STD_THRES	(3*RX_RING_SIZE)/4
 337#define RX_PANIC_MINI_THRES	12
 338#define RX_PANIC_MINI_REFILL	(3*RX_PANIC_MINI_THRES)/2
 339#define RX_LOW_MINI_THRES	(3*RX_MINI_SIZE)/4
 340#define RX_PANIC_JUMBO_THRES	6
 341#define RX_PANIC_JUMBO_REFILL	(3*RX_PANIC_JUMBO_THRES)/2
 342#define RX_LOW_JUMBO_THRES	(3*RX_JUMBO_SIZE)/4
 343
 344
 345/*
 346 * Size of the mini ring entries, basically these just should be big
 347 * enough to take TCP ACKs
 348 */
 349#define ACE_MINI_SIZE		100
 350
 351#define ACE_MINI_BUFSIZE	ACE_MINI_SIZE
 352#define ACE_STD_BUFSIZE		(ACE_STD_MTU + ETH_HLEN + 4)
 353#define ACE_JUMBO_BUFSIZE	(ACE_JUMBO_MTU + ETH_HLEN + 4)
 354
 355/*
 356 * There seems to be a magic difference in the effect between 995 and 996
 357 * but little difference between 900 and 995 ... no idea why.
 358 *
 359 * There is now a default set of tuning parameters which is set, depending
 360 * on whether or not the user enables Jumbo frames. It's assumed that if
 361 * Jumbo frames are enabled, the user wants optimal tuning for that case.
 362 */
 363#define DEF_TX_COAL		400 /* 996 */
 364#define DEF_TX_MAX_DESC		60  /* was 40 */
 365#define DEF_RX_COAL		120 /* 1000 */
 366#define DEF_RX_MAX_DESC		25
 367#define DEF_TX_RATIO		21 /* 24 */
 368
 369#define DEF_JUMBO_TX_COAL	20
 370#define DEF_JUMBO_TX_MAX_DESC	60
 371#define DEF_JUMBO_RX_COAL	30
 372#define DEF_JUMBO_RX_MAX_DESC	6
 373#define DEF_JUMBO_TX_RATIO	21
 374
 375#if tigon2FwReleaseLocal < 20001118
 376/*
 377 * Standard firmware and early modifications duplicate
 378 * IRQ load without this flag (coal timer is never reset).
 379 * Note that with this flag tx_coal should be less than
 380 * time to xmit full tx ring.
 381 * 400usec is not so bad for tx ring size of 128.
 382 */
 383#define TX_COAL_INTS_ONLY	1	/* worth it */
 384#else
 385/*
 386 * With modified firmware, this is not necessary, but still useful.
 387 */
 388#define TX_COAL_INTS_ONLY	1
 389#endif
 390
 391#define DEF_TRACE		0
 392#define DEF_STAT		(2 * TICKS_PER_SEC)
 393
 394
 395static int link_state[ACE_MAX_MOD_PARMS];
 396static int trace[ACE_MAX_MOD_PARMS];
 397static int tx_coal_tick[ACE_MAX_MOD_PARMS];
 398static int rx_coal_tick[ACE_MAX_MOD_PARMS];
 399static int max_tx_desc[ACE_MAX_MOD_PARMS];
 400static int max_rx_desc[ACE_MAX_MOD_PARMS];
 401static int tx_ratio[ACE_MAX_MOD_PARMS];
 402static int dis_pci_mem_inval[ACE_MAX_MOD_PARMS] = {1, 1, 1, 1, 1, 1, 1, 1};
 403
 404MODULE_AUTHOR("Jes Sorensen <jes@trained-monkey.org>");
 405MODULE_LICENSE("GPL");
 406MODULE_DESCRIPTION("AceNIC/3C985/GA620 Gigabit Ethernet driver");
 407#ifndef CONFIG_ACENIC_OMIT_TIGON_I
 408MODULE_FIRMWARE("acenic/tg1.bin");
 409#endif
 410MODULE_FIRMWARE("acenic/tg2.bin");
 411
 412module_param_array_named(link, link_state, int, NULL, 0);
 413module_param_array(trace, int, NULL, 0);
 414module_param_array(tx_coal_tick, int, NULL, 0);
 415module_param_array(max_tx_desc, int, NULL, 0);
 416module_param_array(rx_coal_tick, int, NULL, 0);
 417module_param_array(max_rx_desc, int, NULL, 0);
 418module_param_array(tx_ratio, int, NULL, 0);
 419MODULE_PARM_DESC(link, "AceNIC/3C985/NetGear link state");
 420MODULE_PARM_DESC(trace, "AceNIC/3C985/NetGear firmware trace level");
 421MODULE_PARM_DESC(tx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first tx descriptor arrives");
 422MODULE_PARM_DESC(max_tx_desc, "AceNIC/3C985/GA620 max number of transmit descriptors to wait");
 423MODULE_PARM_DESC(rx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first rx descriptor arrives");
 424MODULE_PARM_DESC(max_rx_desc, "AceNIC/3C985/GA620 max number of receive descriptors to wait");
 425MODULE_PARM_DESC(tx_ratio, "AceNIC/3C985/GA620 ratio of NIC memory used for TX/RX descriptors (range 0-63)");
 426
 427
 428static const char version[] =
 429  "acenic.c: v0.92 08/05/2002  Jes Sorensen, linux-acenic@SunSITE.dk\n"
 430  "                            http://home.cern.ch/~jes/gige/acenic.html\n";
 431
 432static int ace_get_link_ksettings(struct net_device *,
 433				  struct ethtool_link_ksettings *);
 434static int ace_set_link_ksettings(struct net_device *,
 435				  const struct ethtool_link_ksettings *);
 436static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 437
 438static const struct ethtool_ops ace_ethtool_ops = {
 439	.get_drvinfo = ace_get_drvinfo,
 440	.get_link_ksettings = ace_get_link_ksettings,
 441	.set_link_ksettings = ace_set_link_ksettings,
 442};
 443
 444static void ace_watchdog(struct net_device *dev);
 445
 446static const struct net_device_ops ace_netdev_ops = {
 447	.ndo_open		= ace_open,
 448	.ndo_stop		= ace_close,
 449	.ndo_tx_timeout		= ace_watchdog,
 450	.ndo_get_stats		= ace_get_stats,
 451	.ndo_start_xmit		= ace_start_xmit,
 452	.ndo_set_rx_mode	= ace_set_multicast_list,
 453	.ndo_validate_addr	= eth_validate_addr,
 454	.ndo_set_mac_address	= ace_set_mac_addr,
 455	.ndo_change_mtu		= ace_change_mtu,
 456};
 457
 458static int acenic_probe_one(struct pci_dev *pdev,
 459			    const struct pci_device_id *id)
 460{
 461	struct net_device *dev;
 462	struct ace_private *ap;
 463	static int boards_found;
 464
 465	dev = alloc_etherdev(sizeof(struct ace_private));
 466	if (dev == NULL)
 467		return -ENOMEM;
 468
 469	SET_NETDEV_DEV(dev, &pdev->dev);
 470
 471	ap = netdev_priv(dev);
 
 472	ap->pdev = pdev;
 473	ap->name = pci_name(pdev);
 474
 475	dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
 476	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 477
 478	dev->watchdog_timeo = 5*HZ;
 479	dev->min_mtu = 0;
 480	dev->max_mtu = ACE_JUMBO_MTU;
 481
 482	dev->netdev_ops = &ace_netdev_ops;
 483	dev->ethtool_ops = &ace_ethtool_ops;
 484
 485	/* we only display this string ONCE */
 486	if (!boards_found)
 487		printk(version);
 488
 489	if (pci_enable_device(pdev))
 490		goto fail_free_netdev;
 491
 492	/*
 493	 * Enable master mode before we start playing with the
 494	 * pci_command word since pci_set_master() will modify
 495	 * it.
 496	 */
 497	pci_set_master(pdev);
 498
 499	pci_read_config_word(pdev, PCI_COMMAND, &ap->pci_command);
 500
 501	/* OpenFirmware on Mac's does not set this - DOH.. */
 502	if (!(ap->pci_command & PCI_COMMAND_MEMORY)) {
 503		printk(KERN_INFO "%s: Enabling PCI Memory Mapped "
 504		       "access - was not enabled by BIOS/Firmware\n",
 505		       ap->name);
 506		ap->pci_command = ap->pci_command | PCI_COMMAND_MEMORY;
 507		pci_write_config_word(ap->pdev, PCI_COMMAND,
 508				      ap->pci_command);
 509		wmb();
 510	}
 511
 512	pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &ap->pci_latency);
 513	if (ap->pci_latency <= 0x40) {
 514		ap->pci_latency = 0x40;
 515		pci_write_config_byte(pdev, PCI_LATENCY_TIMER, ap->pci_latency);
 516	}
 517
 518	/*
 519	 * Remap the regs into kernel space - this is abuse of
 520	 * dev->base_addr since it was means for I/O port
 521	 * addresses but who gives a damn.
 522	 */
 523	dev->base_addr = pci_resource_start(pdev, 0);
 524	ap->regs = ioremap(dev->base_addr, 0x4000);
 525	if (!ap->regs) {
 526		printk(KERN_ERR "%s:  Unable to map I/O register, "
 527		       "AceNIC %i will be disabled.\n",
 528		       ap->name, boards_found);
 529		goto fail_free_netdev;
 530	}
 531
 532	switch(pdev->vendor) {
 533	case PCI_VENDOR_ID_ALTEON:
 534		if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9100T) {
 535			printk(KERN_INFO "%s: Farallon PN9100-T ",
 536			       ap->name);
 537		} else {
 538			printk(KERN_INFO "%s: Alteon AceNIC ",
 539			       ap->name);
 540		}
 541		break;
 542	case PCI_VENDOR_ID_3COM:
 543		printk(KERN_INFO "%s: 3Com 3C985 ", ap->name);
 544		break;
 545	case PCI_VENDOR_ID_NETGEAR:
 546		printk(KERN_INFO "%s: NetGear GA620 ", ap->name);
 547		break;
 548	case PCI_VENDOR_ID_DEC:
 549		if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9000SX) {
 550			printk(KERN_INFO "%s: Farallon PN9000-SX ",
 551			       ap->name);
 552			break;
 553		}
 
 554	case PCI_VENDOR_ID_SGI:
 555		printk(KERN_INFO "%s: SGI AceNIC ", ap->name);
 556		break;
 557	default:
 558		printk(KERN_INFO "%s: Unknown AceNIC ", ap->name);
 559		break;
 560	}
 561
 562	printk("Gigabit Ethernet at 0x%08lx, ", dev->base_addr);
 563	printk("irq %d\n", pdev->irq);
 564
 565#ifdef CONFIG_ACENIC_OMIT_TIGON_I
 566	if ((readl(&ap->regs->HostCtrl) >> 28) == 4) {
 567		printk(KERN_ERR "%s: Driver compiled without Tigon I"
 568		       " support - NIC disabled\n", dev->name);
 569		goto fail_uninit;
 570	}
 571#endif
 572
 573	if (ace_allocate_descriptors(dev))
 574		goto fail_free_netdev;
 575
 576#ifdef MODULE
 577	if (boards_found >= ACE_MAX_MOD_PARMS)
 578		ap->board_idx = BOARD_IDX_OVERFLOW;
 579	else
 580		ap->board_idx = boards_found;
 581#else
 582	ap->board_idx = BOARD_IDX_STATIC;
 583#endif
 584
 585	if (ace_init(dev))
 586		goto fail_free_netdev;
 587
 588	if (register_netdev(dev)) {
 589		printk(KERN_ERR "acenic: device registration failed\n");
 590		goto fail_uninit;
 591	}
 592	ap->name = dev->name;
 593
 594	if (ap->pci_using_dac)
 595		dev->features |= NETIF_F_HIGHDMA;
 596
 597	pci_set_drvdata(pdev, dev);
 598
 599	boards_found++;
 600	return 0;
 601
 602 fail_uninit:
 603	ace_init_cleanup(dev);
 604 fail_free_netdev:
 605	free_netdev(dev);
 606	return -ENODEV;
 607}
 608
 609static void acenic_remove_one(struct pci_dev *pdev)
 610{
 611	struct net_device *dev = pci_get_drvdata(pdev);
 612	struct ace_private *ap = netdev_priv(dev);
 613	struct ace_regs __iomem *regs = ap->regs;
 614	short i;
 615
 616	unregister_netdev(dev);
 617
 618	writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
 619	if (ap->version >= 2)
 620		writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
 621
 622	/*
 623	 * This clears any pending interrupts
 624	 */
 625	writel(1, &regs->Mb0Lo);
 626	readl(&regs->CpuCtrl);	/* flush */
 627
 628	/*
 629	 * Make sure no other CPUs are processing interrupts
 630	 * on the card before the buffers are being released.
 631	 * Otherwise one might experience some `interesting'
 632	 * effects.
 633	 *
 634	 * Then release the RX buffers - jumbo buffers were
 635	 * already released in ace_close().
 636	 */
 637	ace_sync_irq(dev->irq);
 638
 639	for (i = 0; i < RX_STD_RING_ENTRIES; i++) {
 640		struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb;
 641
 642		if (skb) {
 643			struct ring_info *ringp;
 644			dma_addr_t mapping;
 645
 646			ringp = &ap->skb->rx_std_skbuff[i];
 647			mapping = dma_unmap_addr(ringp, mapping);
 648			pci_unmap_page(ap->pdev, mapping,
 649				       ACE_STD_BUFSIZE,
 650				       PCI_DMA_FROMDEVICE);
 651
 652			ap->rx_std_ring[i].size = 0;
 653			ap->skb->rx_std_skbuff[i].skb = NULL;
 654			dev_kfree_skb(skb);
 655		}
 656	}
 657
 658	if (ap->version >= 2) {
 659		for (i = 0; i < RX_MINI_RING_ENTRIES; i++) {
 660			struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb;
 661
 662			if (skb) {
 663				struct ring_info *ringp;
 664				dma_addr_t mapping;
 665
 666				ringp = &ap->skb->rx_mini_skbuff[i];
 667				mapping = dma_unmap_addr(ringp,mapping);
 668				pci_unmap_page(ap->pdev, mapping,
 669					       ACE_MINI_BUFSIZE,
 670					       PCI_DMA_FROMDEVICE);
 671
 672				ap->rx_mini_ring[i].size = 0;
 673				ap->skb->rx_mini_skbuff[i].skb = NULL;
 674				dev_kfree_skb(skb);
 675			}
 676		}
 677	}
 678
 679	for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) {
 680		struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb;
 681		if (skb) {
 682			struct ring_info *ringp;
 683			dma_addr_t mapping;
 684
 685			ringp = &ap->skb->rx_jumbo_skbuff[i];
 686			mapping = dma_unmap_addr(ringp, mapping);
 687			pci_unmap_page(ap->pdev, mapping,
 688				       ACE_JUMBO_BUFSIZE,
 689				       PCI_DMA_FROMDEVICE);
 690
 691			ap->rx_jumbo_ring[i].size = 0;
 692			ap->skb->rx_jumbo_skbuff[i].skb = NULL;
 693			dev_kfree_skb(skb);
 694		}
 695	}
 696
 697	ace_init_cleanup(dev);
 698	free_netdev(dev);
 699}
 700
 701static struct pci_driver acenic_pci_driver = {
 702	.name		= "acenic",
 703	.id_table	= acenic_pci_tbl,
 704	.probe		= acenic_probe_one,
 705	.remove		= acenic_remove_one,
 706};
 707
 708static void ace_free_descriptors(struct net_device *dev)
 709{
 710	struct ace_private *ap = netdev_priv(dev);
 711	int size;
 712
 713	if (ap->rx_std_ring != NULL) {
 714		size = (sizeof(struct rx_desc) *
 715			(RX_STD_RING_ENTRIES +
 716			 RX_JUMBO_RING_ENTRIES +
 717			 RX_MINI_RING_ENTRIES +
 718			 RX_RETURN_RING_ENTRIES));
 719		pci_free_consistent(ap->pdev, size, ap->rx_std_ring,
 720				    ap->rx_ring_base_dma);
 721		ap->rx_std_ring = NULL;
 722		ap->rx_jumbo_ring = NULL;
 723		ap->rx_mini_ring = NULL;
 724		ap->rx_return_ring = NULL;
 725	}
 726	if (ap->evt_ring != NULL) {
 727		size = (sizeof(struct event) * EVT_RING_ENTRIES);
 728		pci_free_consistent(ap->pdev, size, ap->evt_ring,
 729				    ap->evt_ring_dma);
 730		ap->evt_ring = NULL;
 731	}
 732	if (ap->tx_ring != NULL && !ACE_IS_TIGON_I(ap)) {
 733		size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES);
 734		pci_free_consistent(ap->pdev, size, ap->tx_ring,
 735				    ap->tx_ring_dma);
 736	}
 737	ap->tx_ring = NULL;
 738
 739	if (ap->evt_prd != NULL) {
 740		pci_free_consistent(ap->pdev, sizeof(u32),
 741				    (void *)ap->evt_prd, ap->evt_prd_dma);
 742		ap->evt_prd = NULL;
 743	}
 744	if (ap->rx_ret_prd != NULL) {
 745		pci_free_consistent(ap->pdev, sizeof(u32),
 746				    (void *)ap->rx_ret_prd,
 747				    ap->rx_ret_prd_dma);
 748		ap->rx_ret_prd = NULL;
 749	}
 750	if (ap->tx_csm != NULL) {
 751		pci_free_consistent(ap->pdev, sizeof(u32),
 752				    (void *)ap->tx_csm, ap->tx_csm_dma);
 753		ap->tx_csm = NULL;
 754	}
 755}
 756
 757
 758static int ace_allocate_descriptors(struct net_device *dev)
 759{
 760	struct ace_private *ap = netdev_priv(dev);
 761	int size;
 762
 763	size = (sizeof(struct rx_desc) *
 764		(RX_STD_RING_ENTRIES +
 765		 RX_JUMBO_RING_ENTRIES +
 766		 RX_MINI_RING_ENTRIES +
 767		 RX_RETURN_RING_ENTRIES));
 768
 769	ap->rx_std_ring = pci_alloc_consistent(ap->pdev, size,
 770					       &ap->rx_ring_base_dma);
 771	if (ap->rx_std_ring == NULL)
 772		goto fail;
 773
 774	ap->rx_jumbo_ring = ap->rx_std_ring + RX_STD_RING_ENTRIES;
 775	ap->rx_mini_ring = ap->rx_jumbo_ring + RX_JUMBO_RING_ENTRIES;
 776	ap->rx_return_ring = ap->rx_mini_ring + RX_MINI_RING_ENTRIES;
 777
 778	size = (sizeof(struct event) * EVT_RING_ENTRIES);
 779
 780	ap->evt_ring = pci_alloc_consistent(ap->pdev, size, &ap->evt_ring_dma);
 
 781
 782	if (ap->evt_ring == NULL)
 783		goto fail;
 784
 785	/*
 786	 * Only allocate a host TX ring for the Tigon II, the Tigon I
 787	 * has to use PCI registers for this ;-(
 788	 */
 789	if (!ACE_IS_TIGON_I(ap)) {
 790		size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES);
 791
 792		ap->tx_ring = pci_alloc_consistent(ap->pdev, size,
 793						   &ap->tx_ring_dma);
 794
 795		if (ap->tx_ring == NULL)
 796			goto fail;
 797	}
 798
 799	ap->evt_prd = pci_alloc_consistent(ap->pdev, sizeof(u32),
 800					   &ap->evt_prd_dma);
 801	if (ap->evt_prd == NULL)
 802		goto fail;
 803
 804	ap->rx_ret_prd = pci_alloc_consistent(ap->pdev, sizeof(u32),
 805					      &ap->rx_ret_prd_dma);
 806	if (ap->rx_ret_prd == NULL)
 807		goto fail;
 808
 809	ap->tx_csm = pci_alloc_consistent(ap->pdev, sizeof(u32),
 810					  &ap->tx_csm_dma);
 811	if (ap->tx_csm == NULL)
 812		goto fail;
 813
 814	return 0;
 815
 816fail:
 817	/* Clean up. */
 818	ace_init_cleanup(dev);
 819	return 1;
 820}
 821
 822
 823/*
 824 * Generic cleanup handling data allocated during init. Used when the
 825 * module is unloaded or if an error occurs during initialization
 826 */
 827static void ace_init_cleanup(struct net_device *dev)
 828{
 829	struct ace_private *ap;
 830
 831	ap = netdev_priv(dev);
 832
 833	ace_free_descriptors(dev);
 834
 835	if (ap->info)
 836		pci_free_consistent(ap->pdev, sizeof(struct ace_info),
 837				    ap->info, ap->info_dma);
 838	kfree(ap->skb);
 839	kfree(ap->trace_buf);
 840
 841	if (dev->irq)
 842		free_irq(dev->irq, dev);
 843
 844	iounmap(ap->regs);
 845}
 846
 847
 848/*
 849 * Commands are considered to be slow.
 850 */
 851static inline void ace_issue_cmd(struct ace_regs __iomem *regs, struct cmd *cmd)
 852{
 853	u32 idx;
 854
 855	idx = readl(&regs->CmdPrd);
 856
 857	writel(*(u32 *)(cmd), &regs->CmdRng[idx]);
 858	idx = (idx + 1) % CMD_RING_ENTRIES;
 859
 860	writel(idx, &regs->CmdPrd);
 861}
 862
 863
 864static int ace_init(struct net_device *dev)
 865{
 866	struct ace_private *ap;
 867	struct ace_regs __iomem *regs;
 868	struct ace_info *info = NULL;
 869	struct pci_dev *pdev;
 870	unsigned long myjif;
 871	u64 tmp_ptr;
 872	u32 tig_ver, mac1, mac2, tmp, pci_state;
 873	int board_idx, ecode = 0;
 874	short i;
 875	unsigned char cache_size;
 
 876
 877	ap = netdev_priv(dev);
 878	regs = ap->regs;
 879
 880	board_idx = ap->board_idx;
 881
 882	/*
 883	 * aman@sgi.com - its useful to do a NIC reset here to
 884	 * address the `Firmware not running' problem subsequent
 885	 * to any crashes involving the NIC
 886	 */
 887	writel(HW_RESET | (HW_RESET << 24), &regs->HostCtrl);
 888	readl(&regs->HostCtrl);		/* PCI write posting */
 889	udelay(5);
 890
 891	/*
 892	 * Don't access any other registers before this point!
 893	 */
 894#ifdef __BIG_ENDIAN
 895	/*
 896	 * This will most likely need BYTE_SWAP once we switch
 897	 * to using __raw_writel()
 898	 */
 899	writel((WORD_SWAP | CLR_INT | ((WORD_SWAP | CLR_INT) << 24)),
 900	       &regs->HostCtrl);
 901#else
 902	writel((CLR_INT | WORD_SWAP | ((CLR_INT | WORD_SWAP) << 24)),
 903	       &regs->HostCtrl);
 904#endif
 905	readl(&regs->HostCtrl);		/* PCI write posting */
 906
 907	/*
 908	 * Stop the NIC CPU and clear pending interrupts
 909	 */
 910	writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
 911	readl(&regs->CpuCtrl);		/* PCI write posting */
 912	writel(0, &regs->Mb0Lo);
 913
 914	tig_ver = readl(&regs->HostCtrl) >> 28;
 915
 916	switch(tig_ver){
 917#ifndef CONFIG_ACENIC_OMIT_TIGON_I
 918	case 4:
 919	case 5:
 920		printk(KERN_INFO "  Tigon I  (Rev. %i), Firmware: %i.%i.%i, ",
 921		       tig_ver, ap->firmware_major, ap->firmware_minor,
 922		       ap->firmware_fix);
 923		writel(0, &regs->LocalCtrl);
 924		ap->version = 1;
 925		ap->tx_ring_entries = TIGON_I_TX_RING_ENTRIES;
 926		break;
 927#endif
 928	case 6:
 929		printk(KERN_INFO "  Tigon II (Rev. %i), Firmware: %i.%i.%i, ",
 930		       tig_ver, ap->firmware_major, ap->firmware_minor,
 931		       ap->firmware_fix);
 932		writel(readl(&regs->CpuBCtrl) | CPU_HALT, &regs->CpuBCtrl);
 933		readl(&regs->CpuBCtrl);		/* PCI write posting */
 934		/*
 935		 * The SRAM bank size does _not_ indicate the amount
 936		 * of memory on the card, it controls the _bank_ size!
 937		 * Ie. a 1MB AceNIC will have two banks of 512KB.
 938		 */
 939		writel(SRAM_BANK_512K, &regs->LocalCtrl);
 940		writel(SYNC_SRAM_TIMING, &regs->MiscCfg);
 941		ap->version = 2;
 942		ap->tx_ring_entries = MAX_TX_RING_ENTRIES;
 943		break;
 944	default:
 945		printk(KERN_WARNING "  Unsupported Tigon version detected "
 946		       "(%i)\n", tig_ver);
 947		ecode = -ENODEV;
 948		goto init_error;
 949	}
 950
 951	/*
 952	 * ModeStat _must_ be set after the SRAM settings as this change
 953	 * seems to corrupt the ModeStat and possible other registers.
 954	 * The SRAM settings survive resets and setting it to the same
 955	 * value a second time works as well. This is what caused the
 956	 * `Firmware not running' problem on the Tigon II.
 957	 */
 958#ifdef __BIG_ENDIAN
 959	writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL | ACE_BYTE_SWAP_BD |
 960	       ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, &regs->ModeStat);
 961#else
 962	writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL |
 963	       ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, &regs->ModeStat);
 964#endif
 965	readl(&regs->ModeStat);		/* PCI write posting */
 966
 967	mac1 = 0;
 968	for(i = 0; i < 4; i++) {
 969		int t;
 970
 971		mac1 = mac1 << 8;
 972		t = read_eeprom_byte(dev, 0x8c+i);
 973		if (t < 0) {
 974			ecode = -EIO;
 975			goto init_error;
 976		} else
 977			mac1 |= (t & 0xff);
 978	}
 979	mac2 = 0;
 980	for(i = 4; i < 8; i++) {
 981		int t;
 982
 983		mac2 = mac2 << 8;
 984		t = read_eeprom_byte(dev, 0x8c+i);
 985		if (t < 0) {
 986			ecode = -EIO;
 987			goto init_error;
 988		} else
 989			mac2 |= (t & 0xff);
 990	}
 991
 992	writel(mac1, &regs->MacAddrHi);
 993	writel(mac2, &regs->MacAddrLo);
 994
 995	dev->dev_addr[0] = (mac1 >> 8) & 0xff;
 996	dev->dev_addr[1] = mac1 & 0xff;
 997	dev->dev_addr[2] = (mac2 >> 24) & 0xff;
 998	dev->dev_addr[3] = (mac2 >> 16) & 0xff;
 999	dev->dev_addr[4] = (mac2 >> 8) & 0xff;
1000	dev->dev_addr[5] = mac2 & 0xff;
 
1001
1002	printk("MAC: %pM\n", dev->dev_addr);
1003
1004	/*
1005	 * Looks like this is necessary to deal with on all architectures,
1006	 * even this %$#%$# N440BX Intel based thing doesn't get it right.
1007	 * Ie. having two NICs in the machine, one will have the cache
1008	 * line set at boot time, the other will not.
1009	 */
1010	pdev = ap->pdev;
1011	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_size);
1012	cache_size <<= 2;
1013	if (cache_size != SMP_CACHE_BYTES) {
1014		printk(KERN_INFO "  PCI cache line size set incorrectly "
1015		       "(%i bytes) by BIOS/FW, ", cache_size);
1016		if (cache_size > SMP_CACHE_BYTES)
1017			printk("expecting %i\n", SMP_CACHE_BYTES);
1018		else {
1019			printk("correcting to %i\n", SMP_CACHE_BYTES);
1020			pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE,
1021					      SMP_CACHE_BYTES >> 2);
1022		}
1023	}
1024
1025	pci_state = readl(&regs->PciState);
1026	printk(KERN_INFO "  PCI bus width: %i bits, speed: %iMHz, "
1027	       "latency: %i clks\n",
1028	       	(pci_state & PCI_32BIT) ? 32 : 64,
1029		(pci_state & PCI_66MHZ) ? 66 : 33,
1030		ap->pci_latency);
1031
1032	/*
1033	 * Set the max DMA transfer size. Seems that for most systems
1034	 * the performance is better when no MAX parameter is
1035	 * set. However for systems enabling PCI write and invalidate,
1036	 * DMA writes must be set to the L1 cache line size to get
1037	 * optimal performance.
1038	 *
1039	 * The default is now to turn the PCI write and invalidate off
1040	 * - that is what Alteon does for NT.
1041	 */
1042	tmp = READ_CMD_MEM | WRITE_CMD_MEM;
1043	if (ap->version >= 2) {
1044		tmp |= (MEM_READ_MULTIPLE | (pci_state & PCI_66MHZ));
1045		/*
1046		 * Tuning parameters only supported for 8 cards
1047		 */
1048		if (board_idx == BOARD_IDX_OVERFLOW ||
1049		    dis_pci_mem_inval[board_idx]) {
1050			if (ap->pci_command & PCI_COMMAND_INVALIDATE) {
1051				ap->pci_command &= ~PCI_COMMAND_INVALIDATE;
1052				pci_write_config_word(pdev, PCI_COMMAND,
1053						      ap->pci_command);
1054				printk(KERN_INFO "  Disabling PCI memory "
1055				       "write and invalidate\n");
1056			}
1057		} else if (ap->pci_command & PCI_COMMAND_INVALIDATE) {
1058			printk(KERN_INFO "  PCI memory write & invalidate "
1059			       "enabled by BIOS, enabling counter measures\n");
1060
1061			switch(SMP_CACHE_BYTES) {
1062			case 16:
1063				tmp |= DMA_WRITE_MAX_16;
1064				break;
1065			case 32:
1066				tmp |= DMA_WRITE_MAX_32;
1067				break;
1068			case 64:
1069				tmp |= DMA_WRITE_MAX_64;
1070				break;
1071			case 128:
1072				tmp |= DMA_WRITE_MAX_128;
1073				break;
1074			default:
1075				printk(KERN_INFO "  Cache line size %i not "
1076				       "supported, PCI write and invalidate "
1077				       "disabled\n", SMP_CACHE_BYTES);
1078				ap->pci_command &= ~PCI_COMMAND_INVALIDATE;
1079				pci_write_config_word(pdev, PCI_COMMAND,
1080						      ap->pci_command);
1081			}
1082		}
1083	}
1084
1085#ifdef __sparc__
1086	/*
1087	 * On this platform, we know what the best dma settings
1088	 * are.  We use 64-byte maximum bursts, because if we
1089	 * burst larger than the cache line size (or even cross
1090	 * a 64byte boundary in a single burst) the UltraSparc
1091	 * PCI controller will disconnect at 64-byte multiples.
1092	 *
1093	 * Read-multiple will be properly enabled above, and when
1094	 * set will give the PCI controller proper hints about
1095	 * prefetching.
1096	 */
1097	tmp &= ~DMA_READ_WRITE_MASK;
1098	tmp |= DMA_READ_MAX_64;
1099	tmp |= DMA_WRITE_MAX_64;
1100#endif
1101#ifdef __alpha__
1102	tmp &= ~DMA_READ_WRITE_MASK;
1103	tmp |= DMA_READ_MAX_128;
1104	/*
1105	 * All the docs say MUST NOT. Well, I did.
1106	 * Nothing terrible happens, if we load wrong size.
1107	 * Bit w&i still works better!
1108	 */
1109	tmp |= DMA_WRITE_MAX_128;
1110#endif
1111	writel(tmp, &regs->PciState);
1112
1113#if 0
1114	/*
1115	 * The Host PCI bus controller driver has to set FBB.
1116	 * If all devices on that PCI bus support FBB, then the controller
1117	 * can enable FBB support in the Host PCI Bus controller (or on
1118	 * the PCI-PCI bridge if that applies).
1119	 * -ggg
1120	 */
1121	/*
1122	 * I have received reports from people having problems when this
1123	 * bit is enabled.
1124	 */
1125	if (!(ap->pci_command & PCI_COMMAND_FAST_BACK)) {
1126		printk(KERN_INFO "  Enabling PCI Fast Back to Back\n");
1127		ap->pci_command |= PCI_COMMAND_FAST_BACK;
1128		pci_write_config_word(pdev, PCI_COMMAND, ap->pci_command);
1129	}
1130#endif
1131
1132	/*
1133	 * Configure DMA attributes.
1134	 */
1135	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
1136		ap->pci_using_dac = 1;
1137	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
1138		ap->pci_using_dac = 0;
1139	} else {
1140		ecode = -ENODEV;
1141		goto init_error;
1142	}
1143
1144	/*
1145	 * Initialize the generic info block and the command+event rings
1146	 * and the control blocks for the transmit and receive rings
1147	 * as they need to be setup once and for all.
1148	 */
1149	if (!(info = pci_alloc_consistent(ap->pdev, sizeof(struct ace_info),
1150					  &ap->info_dma))) {
1151		ecode = -EAGAIN;
1152		goto init_error;
1153	}
1154	ap->info = info;
1155
1156	/*
1157	 * Get the memory for the skb rings.
1158	 */
1159	if (!(ap->skb = kmalloc(sizeof(struct ace_skb), GFP_KERNEL))) {
1160		ecode = -EAGAIN;
1161		goto init_error;
1162	}
1163
1164	ecode = request_irq(pdev->irq, ace_interrupt, IRQF_SHARED,
1165			    DRV_NAME, dev);
1166	if (ecode) {
1167		printk(KERN_WARNING "%s: Requested IRQ %d is busy\n",
1168		       DRV_NAME, pdev->irq);
1169		goto init_error;
1170	} else
1171		dev->irq = pdev->irq;
1172
1173#ifdef INDEX_DEBUG
1174	spin_lock_init(&ap->debug_lock);
1175	ap->last_tx = ACE_TX_RING_ENTRIES(ap) - 1;
1176	ap->last_std_rx = 0;
1177	ap->last_mini_rx = 0;
1178#endif
1179
1180	memset(ap->info, 0, sizeof(struct ace_info));
1181	memset(ap->skb, 0, sizeof(struct ace_skb));
1182
1183	ecode = ace_load_firmware(dev);
1184	if (ecode)
1185		goto init_error;
1186
1187	ap->fw_running = 0;
1188
1189	tmp_ptr = ap->info_dma;
1190	writel(tmp_ptr >> 32, &regs->InfoPtrHi);
1191	writel(tmp_ptr & 0xffffffff, &regs->InfoPtrLo);
1192
1193	memset(ap->evt_ring, 0, EVT_RING_ENTRIES * sizeof(struct event));
1194
1195	set_aceaddr(&info->evt_ctrl.rngptr, ap->evt_ring_dma);
1196	info->evt_ctrl.flags = 0;
1197
1198	*(ap->evt_prd) = 0;
1199	wmb();
1200	set_aceaddr(&info->evt_prd_ptr, ap->evt_prd_dma);
1201	writel(0, &regs->EvtCsm);
1202
1203	set_aceaddr(&info->cmd_ctrl.rngptr, 0x100);
1204	info->cmd_ctrl.flags = 0;
1205	info->cmd_ctrl.max_len = 0;
1206
1207	for (i = 0; i < CMD_RING_ENTRIES; i++)
1208		writel(0, &regs->CmdRng[i]);
1209
1210	writel(0, &regs->CmdPrd);
1211	writel(0, &regs->CmdCsm);
1212
1213	tmp_ptr = ap->info_dma;
1214	tmp_ptr += (unsigned long) &(((struct ace_info *)0)->s.stats);
1215	set_aceaddr(&info->stats2_ptr, (dma_addr_t) tmp_ptr);
1216
1217	set_aceaddr(&info->rx_std_ctrl.rngptr, ap->rx_ring_base_dma);
1218	info->rx_std_ctrl.max_len = ACE_STD_BUFSIZE;
1219	info->rx_std_ctrl.flags =
1220	  RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1221
1222	memset(ap->rx_std_ring, 0,
1223	       RX_STD_RING_ENTRIES * sizeof(struct rx_desc));
1224
1225	for (i = 0; i < RX_STD_RING_ENTRIES; i++)
1226		ap->rx_std_ring[i].flags = BD_FLG_TCP_UDP_SUM;
1227
1228	ap->rx_std_skbprd = 0;
1229	atomic_set(&ap->cur_rx_bufs, 0);
1230
1231	set_aceaddr(&info->rx_jumbo_ctrl.rngptr,
1232		    (ap->rx_ring_base_dma +
1233		     (sizeof(struct rx_desc) * RX_STD_RING_ENTRIES)));
1234	info->rx_jumbo_ctrl.max_len = 0;
1235	info->rx_jumbo_ctrl.flags =
1236	  RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1237
1238	memset(ap->rx_jumbo_ring, 0,
1239	       RX_JUMBO_RING_ENTRIES * sizeof(struct rx_desc));
1240
1241	for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++)
1242		ap->rx_jumbo_ring[i].flags = BD_FLG_TCP_UDP_SUM | BD_FLG_JUMBO;
1243
1244	ap->rx_jumbo_skbprd = 0;
1245	atomic_set(&ap->cur_jumbo_bufs, 0);
1246
1247	memset(ap->rx_mini_ring, 0,
1248	       RX_MINI_RING_ENTRIES * sizeof(struct rx_desc));
1249
1250	if (ap->version >= 2) {
1251		set_aceaddr(&info->rx_mini_ctrl.rngptr,
1252			    (ap->rx_ring_base_dma +
1253			     (sizeof(struct rx_desc) *
1254			      (RX_STD_RING_ENTRIES +
1255			       RX_JUMBO_RING_ENTRIES))));
1256		info->rx_mini_ctrl.max_len = ACE_MINI_SIZE;
1257		info->rx_mini_ctrl.flags =
1258		  RCB_FLG_TCP_UDP_SUM|RCB_FLG_NO_PSEUDO_HDR|RCB_FLG_VLAN_ASSIST;
1259
1260		for (i = 0; i < RX_MINI_RING_ENTRIES; i++)
1261			ap->rx_mini_ring[i].flags =
1262				BD_FLG_TCP_UDP_SUM | BD_FLG_MINI;
1263	} else {
1264		set_aceaddr(&info->rx_mini_ctrl.rngptr, 0);
1265		info->rx_mini_ctrl.flags = RCB_FLG_RNG_DISABLE;
1266		info->rx_mini_ctrl.max_len = 0;
1267	}
1268
1269	ap->rx_mini_skbprd = 0;
1270	atomic_set(&ap->cur_mini_bufs, 0);
1271
1272	set_aceaddr(&info->rx_return_ctrl.rngptr,
1273		    (ap->rx_ring_base_dma +
1274		     (sizeof(struct rx_desc) *
1275		      (RX_STD_RING_ENTRIES +
1276		       RX_JUMBO_RING_ENTRIES +
1277		       RX_MINI_RING_ENTRIES))));
1278	info->rx_return_ctrl.flags = 0;
1279	info->rx_return_ctrl.max_len = RX_RETURN_RING_ENTRIES;
1280
1281	memset(ap->rx_return_ring, 0,
1282	       RX_RETURN_RING_ENTRIES * sizeof(struct rx_desc));
1283
1284	set_aceaddr(&info->rx_ret_prd_ptr, ap->rx_ret_prd_dma);
1285	*(ap->rx_ret_prd) = 0;
1286
1287	writel(TX_RING_BASE, &regs->WinBase);
1288
1289	if (ACE_IS_TIGON_I(ap)) {
1290		ap->tx_ring = (__force struct tx_desc *) regs->Window;
1291		for (i = 0; i < (TIGON_I_TX_RING_ENTRIES
1292				 * sizeof(struct tx_desc)) / sizeof(u32); i++)
1293			writel(0, (__force void __iomem *)ap->tx_ring  + i * 4);
1294
1295		set_aceaddr(&info->tx_ctrl.rngptr, TX_RING_BASE);
1296	} else {
1297		memset(ap->tx_ring, 0,
1298		       MAX_TX_RING_ENTRIES * sizeof(struct tx_desc));
1299
1300		set_aceaddr(&info->tx_ctrl.rngptr, ap->tx_ring_dma);
1301	}
1302
1303	info->tx_ctrl.max_len = ACE_TX_RING_ENTRIES(ap);
1304	tmp = RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST;
1305
1306	/*
1307	 * The Tigon I does not like having the TX ring in host memory ;-(
1308	 */
1309	if (!ACE_IS_TIGON_I(ap))
1310		tmp |= RCB_FLG_TX_HOST_RING;
1311#if TX_COAL_INTS_ONLY
1312	tmp |= RCB_FLG_COAL_INT_ONLY;
1313#endif
1314	info->tx_ctrl.flags = tmp;
1315
1316	set_aceaddr(&info->tx_csm_ptr, ap->tx_csm_dma);
1317
1318	/*
1319	 * Potential item for tuning parameter
1320	 */
1321#if 0 /* NO */
1322	writel(DMA_THRESH_16W, &regs->DmaReadCfg);
1323	writel(DMA_THRESH_16W, &regs->DmaWriteCfg);
1324#else
1325	writel(DMA_THRESH_8W, &regs->DmaReadCfg);
1326	writel(DMA_THRESH_8W, &regs->DmaWriteCfg);
1327#endif
1328
1329	writel(0, &regs->MaskInt);
1330	writel(1, &regs->IfIdx);
1331#if 0
1332	/*
1333	 * McKinley boxes do not like us fiddling with AssistState
1334	 * this early
1335	 */
1336	writel(1, &regs->AssistState);
1337#endif
1338
1339	writel(DEF_STAT, &regs->TuneStatTicks);
1340	writel(DEF_TRACE, &regs->TuneTrace);
1341
1342	ace_set_rxtx_parms(dev, 0);
1343
1344	if (board_idx == BOARD_IDX_OVERFLOW) {
1345		printk(KERN_WARNING "%s: more than %i NICs detected, "
1346		       "ignoring module parameters!\n",
1347		       ap->name, ACE_MAX_MOD_PARMS);
1348	} else if (board_idx >= 0) {
1349		if (tx_coal_tick[board_idx])
1350			writel(tx_coal_tick[board_idx],
1351			       &regs->TuneTxCoalTicks);
1352		if (max_tx_desc[board_idx])
1353			writel(max_tx_desc[board_idx], &regs->TuneMaxTxDesc);
1354
1355		if (rx_coal_tick[board_idx])
1356			writel(rx_coal_tick[board_idx],
1357			       &regs->TuneRxCoalTicks);
1358		if (max_rx_desc[board_idx])
1359			writel(max_rx_desc[board_idx], &regs->TuneMaxRxDesc);
1360
1361		if (trace[board_idx])
1362			writel(trace[board_idx], &regs->TuneTrace);
1363
1364		if ((tx_ratio[board_idx] > 0) && (tx_ratio[board_idx] < 64))
1365			writel(tx_ratio[board_idx], &regs->TxBufRat);
1366	}
1367
1368	/*
1369	 * Default link parameters
1370	 */
1371	tmp = LNK_ENABLE | LNK_FULL_DUPLEX | LNK_1000MB | LNK_100MB |
1372		LNK_10MB | LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL | LNK_NEGOTIATE;
1373	if(ap->version >= 2)
1374		tmp |= LNK_TX_FLOW_CTL_Y;
1375
1376	/*
1377	 * Override link default parameters
1378	 */
1379	if ((board_idx >= 0) && link_state[board_idx]) {
1380		int option = link_state[board_idx];
1381
1382		tmp = LNK_ENABLE;
1383
1384		if (option & 0x01) {
1385			printk(KERN_INFO "%s: Setting half duplex link\n",
1386			       ap->name);
1387			tmp &= ~LNK_FULL_DUPLEX;
1388		}
1389		if (option & 0x02)
1390			tmp &= ~LNK_NEGOTIATE;
1391		if (option & 0x10)
1392			tmp |= LNK_10MB;
1393		if (option & 0x20)
1394			tmp |= LNK_100MB;
1395		if (option & 0x40)
1396			tmp |= LNK_1000MB;
1397		if ((option & 0x70) == 0) {
1398			printk(KERN_WARNING "%s: No media speed specified, "
1399			       "forcing auto negotiation\n", ap->name);
1400			tmp |= LNK_NEGOTIATE | LNK_1000MB |
1401				LNK_100MB | LNK_10MB;
1402		}
1403		if ((option & 0x100) == 0)
1404			tmp |= LNK_NEG_FCTL;
1405		else
1406			printk(KERN_INFO "%s: Disabling flow control "
1407			       "negotiation\n", ap->name);
1408		if (option & 0x200)
1409			tmp |= LNK_RX_FLOW_CTL_Y;
1410		if ((option & 0x400) && (ap->version >= 2)) {
1411			printk(KERN_INFO "%s: Enabling TX flow control\n",
1412			       ap->name);
1413			tmp |= LNK_TX_FLOW_CTL_Y;
1414		}
1415	}
1416
1417	ap->link = tmp;
1418	writel(tmp, &regs->TuneLink);
1419	if (ap->version >= 2)
1420		writel(tmp, &regs->TuneFastLink);
1421
1422	writel(ap->firmware_start, &regs->Pc);
1423
1424	writel(0, &regs->Mb0Lo);
1425
1426	/*
1427	 * Set tx_csm before we start receiving interrupts, otherwise
1428	 * the interrupt handler might think it is supposed to process
1429	 * tx ints before we are up and running, which may cause a null
1430	 * pointer access in the int handler.
1431	 */
1432	ap->cur_rx = 0;
1433	ap->tx_prd = *(ap->tx_csm) = ap->tx_ret_csm = 0;
1434
1435	wmb();
1436	ace_set_txprd(regs, ap, 0);
1437	writel(0, &regs->RxRetCsm);
1438
1439	/*
1440	 * Enable DMA engine now.
1441	 * If we do this sooner, Mckinley box pukes.
1442	 * I assume it's because Tigon II DMA engine wants to check
1443	 * *something* even before the CPU is started.
1444	 */
1445	writel(1, &regs->AssistState);  /* enable DMA */
1446
1447	/*
1448	 * Start the NIC CPU
1449	 */
1450	writel(readl(&regs->CpuCtrl) & ~(CPU_HALT|CPU_TRACE), &regs->CpuCtrl);
1451	readl(&regs->CpuCtrl);
1452
1453	/*
1454	 * Wait for the firmware to spin up - max 3 seconds.
1455	 */
1456	myjif = jiffies + 3 * HZ;
1457	while (time_before(jiffies, myjif) && !ap->fw_running)
1458		cpu_relax();
1459
1460	if (!ap->fw_running) {
1461		printk(KERN_ERR "%s: Firmware NOT running!\n", ap->name);
1462
1463		ace_dump_trace(ap);
1464		writel(readl(&regs->CpuCtrl) | CPU_HALT, &regs->CpuCtrl);
1465		readl(&regs->CpuCtrl);
1466
1467		/* aman@sgi.com - account for badly behaving firmware/NIC:
1468		 * - have observed that the NIC may continue to generate
1469		 *   interrupts for some reason; attempt to stop it - halt
1470		 *   second CPU for Tigon II cards, and also clear Mb0
1471		 * - if we're a module, we'll fail to load if this was
1472		 *   the only GbE card in the system => if the kernel does
1473		 *   see an interrupt from the NIC, code to handle it is
1474		 *   gone and OOps! - so free_irq also
1475		 */
1476		if (ap->version >= 2)
1477			writel(readl(&regs->CpuBCtrl) | CPU_HALT,
1478			       &regs->CpuBCtrl);
1479		writel(0, &regs->Mb0Lo);
1480		readl(&regs->Mb0Lo);
1481
1482		ecode = -EBUSY;
1483		goto init_error;
1484	}
1485
1486	/*
1487	 * We load the ring here as there seem to be no way to tell the
1488	 * firmware to wipe the ring without re-initializing it.
1489	 */
1490	if (!test_and_set_bit(0, &ap->std_refill_busy))
1491		ace_load_std_rx_ring(dev, RX_RING_SIZE);
1492	else
1493		printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n",
1494		       ap->name);
1495	if (ap->version >= 2) {
1496		if (!test_and_set_bit(0, &ap->mini_refill_busy))
1497			ace_load_mini_rx_ring(dev, RX_MINI_SIZE);
1498		else
1499			printk(KERN_ERR "%s: Someone is busy refilling "
1500			       "the RX mini ring\n", ap->name);
1501	}
1502	return 0;
1503
1504 init_error:
1505	ace_init_cleanup(dev);
1506	return ecode;
1507}
1508
1509
1510static void ace_set_rxtx_parms(struct net_device *dev, int jumbo)
1511{
1512	struct ace_private *ap = netdev_priv(dev);
1513	struct ace_regs __iomem *regs = ap->regs;
1514	int board_idx = ap->board_idx;
1515
1516	if (board_idx >= 0) {
1517		if (!jumbo) {
1518			if (!tx_coal_tick[board_idx])
1519				writel(DEF_TX_COAL, &regs->TuneTxCoalTicks);
1520			if (!max_tx_desc[board_idx])
1521				writel(DEF_TX_MAX_DESC, &regs->TuneMaxTxDesc);
1522			if (!rx_coal_tick[board_idx])
1523				writel(DEF_RX_COAL, &regs->TuneRxCoalTicks);
1524			if (!max_rx_desc[board_idx])
1525				writel(DEF_RX_MAX_DESC, &regs->TuneMaxRxDesc);
1526			if (!tx_ratio[board_idx])
1527				writel(DEF_TX_RATIO, &regs->TxBufRat);
1528		} else {
1529			if (!tx_coal_tick[board_idx])
1530				writel(DEF_JUMBO_TX_COAL,
1531				       &regs->TuneTxCoalTicks);
1532			if (!max_tx_desc[board_idx])
1533				writel(DEF_JUMBO_TX_MAX_DESC,
1534				       &regs->TuneMaxTxDesc);
1535			if (!rx_coal_tick[board_idx])
1536				writel(DEF_JUMBO_RX_COAL,
1537				       &regs->TuneRxCoalTicks);
1538			if (!max_rx_desc[board_idx])
1539				writel(DEF_JUMBO_RX_MAX_DESC,
1540				       &regs->TuneMaxRxDesc);
1541			if (!tx_ratio[board_idx])
1542				writel(DEF_JUMBO_TX_RATIO, &regs->TxBufRat);
1543		}
1544	}
1545}
1546
1547
1548static void ace_watchdog(struct net_device *data)
1549{
1550	struct net_device *dev = data;
1551	struct ace_private *ap = netdev_priv(dev);
1552	struct ace_regs __iomem *regs = ap->regs;
1553
1554	/*
1555	 * We haven't received a stats update event for more than 2.5
1556	 * seconds and there is data in the transmit queue, thus we
1557	 * assume the card is stuck.
1558	 */
1559	if (*ap->tx_csm != ap->tx_ret_csm) {
1560		printk(KERN_WARNING "%s: Transmitter is stuck, %08x\n",
1561		       dev->name, (unsigned int)readl(&regs->HostCtrl));
1562		/* This can happen due to ieee flow control. */
1563	} else {
1564		printk(KERN_DEBUG "%s: BUG... transmitter died. Kicking it.\n",
1565		       dev->name);
1566#if 0
1567		netif_wake_queue(dev);
1568#endif
1569	}
1570}
1571
1572
1573static void ace_tasklet(unsigned long arg)
1574{
1575	struct net_device *dev = (struct net_device *) arg;
1576	struct ace_private *ap = netdev_priv(dev);
1577	int cur_size;
1578
1579	cur_size = atomic_read(&ap->cur_rx_bufs);
1580	if ((cur_size < RX_LOW_STD_THRES) &&
1581	    !test_and_set_bit(0, &ap->std_refill_busy)) {
1582#ifdef DEBUG
1583		printk("refilling buffers (current %i)\n", cur_size);
1584#endif
1585		ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size);
1586	}
1587
1588	if (ap->version >= 2) {
1589		cur_size = atomic_read(&ap->cur_mini_bufs);
1590		if ((cur_size < RX_LOW_MINI_THRES) &&
1591		    !test_and_set_bit(0, &ap->mini_refill_busy)) {
1592#ifdef DEBUG
1593			printk("refilling mini buffers (current %i)\n",
1594			       cur_size);
1595#endif
1596			ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size);
1597		}
1598	}
1599
1600	cur_size = atomic_read(&ap->cur_jumbo_bufs);
1601	if (ap->jumbo && (cur_size < RX_LOW_JUMBO_THRES) &&
1602	    !test_and_set_bit(0, &ap->jumbo_refill_busy)) {
1603#ifdef DEBUG
1604		printk("refilling jumbo buffers (current %i)\n", cur_size);
1605#endif
1606		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size);
1607	}
1608	ap->tasklet_pending = 0;
1609}
1610
1611
1612/*
1613 * Copy the contents of the NIC's trace buffer to kernel memory.
1614 */
1615static void ace_dump_trace(struct ace_private *ap)
1616{
1617#if 0
1618	if (!ap->trace_buf)
1619		if (!(ap->trace_buf = kmalloc(ACE_TRACE_SIZE, GFP_KERNEL)))
1620		    return;
1621#endif
1622}
1623
1624
1625/*
1626 * Load the standard rx ring.
1627 *
1628 * Loading rings is safe without holding the spin lock since this is
1629 * done only before the device is enabled, thus no interrupts are
1630 * generated and by the interrupt handler/tasklet handler.
1631 */
1632static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs)
1633{
1634	struct ace_private *ap = netdev_priv(dev);
1635	struct ace_regs __iomem *regs = ap->regs;
1636	short i, idx;
1637
1638
1639	prefetchw(&ap->cur_rx_bufs);
1640
1641	idx = ap->rx_std_skbprd;
1642
1643	for (i = 0; i < nr_bufs; i++) {
1644		struct sk_buff *skb;
1645		struct rx_desc *rd;
1646		dma_addr_t mapping;
1647
1648		skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE);
1649		if (!skb)
1650			break;
1651
1652		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
 
1653				       offset_in_page(skb->data),
1654				       ACE_STD_BUFSIZE,
1655				       PCI_DMA_FROMDEVICE);
1656		ap->skb->rx_std_skbuff[idx].skb = skb;
1657		dma_unmap_addr_set(&ap->skb->rx_std_skbuff[idx],
1658				   mapping, mapping);
1659
1660		rd = &ap->rx_std_ring[idx];
1661		set_aceaddr(&rd->addr, mapping);
1662		rd->size = ACE_STD_BUFSIZE;
1663		rd->idx = idx;
1664		idx = (idx + 1) % RX_STD_RING_ENTRIES;
1665	}
1666
1667	if (!i)
1668		goto error_out;
1669
1670	atomic_add(i, &ap->cur_rx_bufs);
1671	ap->rx_std_skbprd = idx;
1672
1673	if (ACE_IS_TIGON_I(ap)) {
1674		struct cmd cmd;
1675		cmd.evt = C_SET_RX_PRD_IDX;
1676		cmd.code = 0;
1677		cmd.idx = ap->rx_std_skbprd;
1678		ace_issue_cmd(regs, &cmd);
1679	} else {
1680		writel(idx, &regs->RxStdPrd);
1681		wmb();
1682	}
1683
1684 out:
1685	clear_bit(0, &ap->std_refill_busy);
1686	return;
1687
1688 error_out:
1689	printk(KERN_INFO "Out of memory when allocating "
1690	       "standard receive buffers\n");
1691	goto out;
1692}
1693
1694
1695static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs)
1696{
1697	struct ace_private *ap = netdev_priv(dev);
1698	struct ace_regs __iomem *regs = ap->regs;
1699	short i, idx;
1700
1701	prefetchw(&ap->cur_mini_bufs);
1702
1703	idx = ap->rx_mini_skbprd;
1704	for (i = 0; i < nr_bufs; i++) {
1705		struct sk_buff *skb;
1706		struct rx_desc *rd;
1707		dma_addr_t mapping;
1708
1709		skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE);
1710		if (!skb)
1711			break;
1712
1713		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
 
1714				       offset_in_page(skb->data),
1715				       ACE_MINI_BUFSIZE,
1716				       PCI_DMA_FROMDEVICE);
1717		ap->skb->rx_mini_skbuff[idx].skb = skb;
1718		dma_unmap_addr_set(&ap->skb->rx_mini_skbuff[idx],
1719				   mapping, mapping);
1720
1721		rd = &ap->rx_mini_ring[idx];
1722		set_aceaddr(&rd->addr, mapping);
1723		rd->size = ACE_MINI_BUFSIZE;
1724		rd->idx = idx;
1725		idx = (idx + 1) % RX_MINI_RING_ENTRIES;
1726	}
1727
1728	if (!i)
1729		goto error_out;
1730
1731	atomic_add(i, &ap->cur_mini_bufs);
1732
1733	ap->rx_mini_skbprd = idx;
1734
1735	writel(idx, &regs->RxMiniPrd);
1736	wmb();
1737
1738 out:
1739	clear_bit(0, &ap->mini_refill_busy);
1740	return;
1741 error_out:
1742	printk(KERN_INFO "Out of memory when allocating "
1743	       "mini receive buffers\n");
1744	goto out;
1745}
1746
1747
1748/*
1749 * Load the jumbo rx ring, this may happen at any time if the MTU
1750 * is changed to a value > 1500.
1751 */
1752static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs)
1753{
1754	struct ace_private *ap = netdev_priv(dev);
1755	struct ace_regs __iomem *regs = ap->regs;
1756	short i, idx;
1757
1758	idx = ap->rx_jumbo_skbprd;
1759
1760	for (i = 0; i < nr_bufs; i++) {
1761		struct sk_buff *skb;
1762		struct rx_desc *rd;
1763		dma_addr_t mapping;
1764
1765		skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE);
1766		if (!skb)
1767			break;
1768
1769		mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
 
1770				       offset_in_page(skb->data),
1771				       ACE_JUMBO_BUFSIZE,
1772				       PCI_DMA_FROMDEVICE);
1773		ap->skb->rx_jumbo_skbuff[idx].skb = skb;
1774		dma_unmap_addr_set(&ap->skb->rx_jumbo_skbuff[idx],
1775				   mapping, mapping);
1776
1777		rd = &ap->rx_jumbo_ring[idx];
1778		set_aceaddr(&rd->addr, mapping);
1779		rd->size = ACE_JUMBO_BUFSIZE;
1780		rd->idx = idx;
1781		idx = (idx + 1) % RX_JUMBO_RING_ENTRIES;
1782	}
1783
1784	if (!i)
1785		goto error_out;
1786
1787	atomic_add(i, &ap->cur_jumbo_bufs);
1788	ap->rx_jumbo_skbprd = idx;
1789
1790	if (ACE_IS_TIGON_I(ap)) {
1791		struct cmd cmd;
1792		cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
1793		cmd.code = 0;
1794		cmd.idx = ap->rx_jumbo_skbprd;
1795		ace_issue_cmd(regs, &cmd);
1796	} else {
1797		writel(idx, &regs->RxJumboPrd);
1798		wmb();
1799	}
1800
1801 out:
1802	clear_bit(0, &ap->jumbo_refill_busy);
1803	return;
1804 error_out:
1805	if (net_ratelimit())
1806		printk(KERN_INFO "Out of memory when allocating "
1807		       "jumbo receive buffers\n");
1808	goto out;
1809}
1810
1811
1812/*
1813 * All events are considered to be slow (RX/TX ints do not generate
1814 * events) and are handled here, outside the main interrupt handler,
1815 * to reduce the size of the handler.
1816 */
1817static u32 ace_handle_event(struct net_device *dev, u32 evtcsm, u32 evtprd)
1818{
1819	struct ace_private *ap;
1820
1821	ap = netdev_priv(dev);
1822
1823	while (evtcsm != evtprd) {
1824		switch (ap->evt_ring[evtcsm].evt) {
1825		case E_FW_RUNNING:
1826			printk(KERN_INFO "%s: Firmware up and running\n",
1827			       ap->name);
1828			ap->fw_running = 1;
1829			wmb();
1830			break;
1831		case E_STATS_UPDATED:
1832			break;
1833		case E_LNK_STATE:
1834		{
1835			u16 code = ap->evt_ring[evtcsm].code;
1836			switch (code) {
1837			case E_C_LINK_UP:
1838			{
1839				u32 state = readl(&ap->regs->GigLnkState);
1840				printk(KERN_WARNING "%s: Optical link UP "
1841				       "(%s Duplex, Flow Control: %s%s)\n",
1842				       ap->name,
1843				       state & LNK_FULL_DUPLEX ? "Full":"Half",
1844				       state & LNK_TX_FLOW_CTL_Y ? "TX " : "",
1845				       state & LNK_RX_FLOW_CTL_Y ? "RX" : "");
1846				break;
1847			}
1848			case E_C_LINK_DOWN:
1849				printk(KERN_WARNING "%s: Optical link DOWN\n",
1850				       ap->name);
1851				break;
1852			case E_C_LINK_10_100:
1853				printk(KERN_WARNING "%s: 10/100BaseT link "
1854				       "UP\n", ap->name);
1855				break;
1856			default:
1857				printk(KERN_ERR "%s: Unknown optical link "
1858				       "state %02x\n", ap->name, code);
1859			}
1860			break;
1861		}
1862		case E_ERROR:
1863			switch(ap->evt_ring[evtcsm].code) {
1864			case E_C_ERR_INVAL_CMD:
1865				printk(KERN_ERR "%s: invalid command error\n",
1866				       ap->name);
1867				break;
1868			case E_C_ERR_UNIMP_CMD:
1869				printk(KERN_ERR "%s: unimplemented command "
1870				       "error\n", ap->name);
1871				break;
1872			case E_C_ERR_BAD_CFG:
1873				printk(KERN_ERR "%s: bad config error\n",
1874				       ap->name);
1875				break;
1876			default:
1877				printk(KERN_ERR "%s: unknown error %02x\n",
1878				       ap->name, ap->evt_ring[evtcsm].code);
1879			}
1880			break;
1881		case E_RESET_JUMBO_RNG:
1882		{
1883			int i;
1884			for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) {
1885				if (ap->skb->rx_jumbo_skbuff[i].skb) {
1886					ap->rx_jumbo_ring[i].size = 0;
1887					set_aceaddr(&ap->rx_jumbo_ring[i].addr, 0);
1888					dev_kfree_skb(ap->skb->rx_jumbo_skbuff[i].skb);
1889					ap->skb->rx_jumbo_skbuff[i].skb = NULL;
1890				}
1891			}
1892
1893 			if (ACE_IS_TIGON_I(ap)) {
1894 				struct cmd cmd;
1895 				cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
1896 				cmd.code = 0;
1897 				cmd.idx = 0;
1898 				ace_issue_cmd(ap->regs, &cmd);
1899 			} else {
1900 				writel(0, &((ap->regs)->RxJumboPrd));
1901 				wmb();
1902 			}
1903
1904			ap->jumbo = 0;
1905			ap->rx_jumbo_skbprd = 0;
1906			printk(KERN_INFO "%s: Jumbo ring flushed\n",
1907			       ap->name);
1908			clear_bit(0, &ap->jumbo_refill_busy);
1909			break;
1910		}
1911		default:
1912			printk(KERN_ERR "%s: Unhandled event 0x%02x\n",
1913			       ap->name, ap->evt_ring[evtcsm].evt);
1914		}
1915		evtcsm = (evtcsm + 1) % EVT_RING_ENTRIES;
1916	}
1917
1918	return evtcsm;
1919}
1920
1921
1922static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
1923{
1924	struct ace_private *ap = netdev_priv(dev);
1925	u32 idx;
1926	int mini_count = 0, std_count = 0;
1927
1928	idx = rxretcsm;
1929
1930	prefetchw(&ap->cur_rx_bufs);
1931	prefetchw(&ap->cur_mini_bufs);
1932
1933	while (idx != rxretprd) {
1934		struct ring_info *rip;
1935		struct sk_buff *skb;
1936		struct rx_desc *rxdesc, *retdesc;
1937		u32 skbidx;
1938		int bd_flags, desc_type, mapsize;
1939		u16 csum;
1940
1941
1942		/* make sure the rx descriptor isn't read before rxretprd */
1943		if (idx == rxretcsm)
1944			rmb();
1945
1946		retdesc = &ap->rx_return_ring[idx];
1947		skbidx = retdesc->idx;
1948		bd_flags = retdesc->flags;
1949		desc_type = bd_flags & (BD_FLG_JUMBO | BD_FLG_MINI);
1950
1951		switch(desc_type) {
1952			/*
1953			 * Normal frames do not have any flags set
1954			 *
1955			 * Mini and normal frames arrive frequently,
1956			 * so use a local counter to avoid doing
1957			 * atomic operations for each packet arriving.
1958			 */
1959		case 0:
1960			rip = &ap->skb->rx_std_skbuff[skbidx];
1961			mapsize = ACE_STD_BUFSIZE;
1962			rxdesc = &ap->rx_std_ring[skbidx];
1963			std_count++;
1964			break;
1965		case BD_FLG_JUMBO:
1966			rip = &ap->skb->rx_jumbo_skbuff[skbidx];
1967			mapsize = ACE_JUMBO_BUFSIZE;
1968			rxdesc = &ap->rx_jumbo_ring[skbidx];
1969			atomic_dec(&ap->cur_jumbo_bufs);
1970			break;
1971		case BD_FLG_MINI:
1972			rip = &ap->skb->rx_mini_skbuff[skbidx];
1973			mapsize = ACE_MINI_BUFSIZE;
1974			rxdesc = &ap->rx_mini_ring[skbidx];
1975			mini_count++;
1976			break;
1977		default:
1978			printk(KERN_INFO "%s: unknown frame type (0x%02x) "
1979			       "returned by NIC\n", dev->name,
1980			       retdesc->flags);
1981			goto error;
1982		}
1983
1984		skb = rip->skb;
1985		rip->skb = NULL;
1986		pci_unmap_page(ap->pdev,
1987			       dma_unmap_addr(rip, mapping),
1988			       mapsize,
1989			       PCI_DMA_FROMDEVICE);
1990		skb_put(skb, retdesc->size);
1991
1992		/*
1993		 * Fly baby, fly!
1994		 */
1995		csum = retdesc->tcp_udp_csum;
1996
1997		skb->protocol = eth_type_trans(skb, dev);
1998
1999		/*
2000		 * Instead of forcing the poor tigon mips cpu to calculate
2001		 * pseudo hdr checksum, we do this ourselves.
2002		 */
2003		if (bd_flags & BD_FLG_TCP_UDP_SUM) {
2004			skb->csum = htons(csum);
2005			skb->ip_summed = CHECKSUM_COMPLETE;
2006		} else {
2007			skb_checksum_none_assert(skb);
2008		}
2009
2010		/* send it up */
2011		if ((bd_flags & BD_FLG_VLAN_TAG))
2012			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), retdesc->vlan);
2013		netif_rx(skb);
2014
2015		dev->stats.rx_packets++;
2016		dev->stats.rx_bytes += retdesc->size;
2017
2018		idx = (idx + 1) % RX_RETURN_RING_ENTRIES;
2019	}
2020
2021	atomic_sub(std_count, &ap->cur_rx_bufs);
2022	if (!ACE_IS_TIGON_I(ap))
2023		atomic_sub(mini_count, &ap->cur_mini_bufs);
2024
2025 out:
2026	/*
2027	 * According to the documentation RxRetCsm is obsolete with
2028	 * the 12.3.x Firmware - my Tigon I NICs seem to disagree!
2029	 */
2030	if (ACE_IS_TIGON_I(ap)) {
2031		writel(idx, &ap->regs->RxRetCsm);
2032	}
2033	ap->cur_rx = idx;
2034
2035	return;
2036 error:
2037	idx = rxretprd;
2038	goto out;
2039}
2040
2041
2042static inline void ace_tx_int(struct net_device *dev,
2043			      u32 txcsm, u32 idx)
2044{
2045	struct ace_private *ap = netdev_priv(dev);
2046
2047	do {
2048		struct sk_buff *skb;
2049		struct tx_ring_info *info;
2050
2051		info = ap->skb->tx_skbuff + idx;
2052		skb = info->skb;
2053
2054		if (dma_unmap_len(info, maplen)) {
2055			pci_unmap_page(ap->pdev, dma_unmap_addr(info, mapping),
 
2056				       dma_unmap_len(info, maplen),
2057				       PCI_DMA_TODEVICE);
2058			dma_unmap_len_set(info, maplen, 0);
2059		}
2060
2061		if (skb) {
2062			dev->stats.tx_packets++;
2063			dev->stats.tx_bytes += skb->len;
2064			dev_kfree_skb_irq(skb);
2065			info->skb = NULL;
2066		}
2067
2068		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2069	} while (idx != txcsm);
2070
2071	if (netif_queue_stopped(dev))
2072		netif_wake_queue(dev);
2073
2074	wmb();
2075	ap->tx_ret_csm = txcsm;
2076
2077	/* So... tx_ret_csm is advanced _after_ check for device wakeup.
2078	 *
2079	 * We could try to make it before. In this case we would get
2080	 * the following race condition: hard_start_xmit on other cpu
2081	 * enters after we advanced tx_ret_csm and fills space,
2082	 * which we have just freed, so that we make illegal device wakeup.
2083	 * There is no good way to workaround this (at entry
2084	 * to ace_start_xmit detects this condition and prevents
2085	 * ring corruption, but it is not a good workaround.)
2086	 *
2087	 * When tx_ret_csm is advanced after, we wake up device _only_
2088	 * if we really have some space in ring (though the core doing
2089	 * hard_start_xmit can see full ring for some period and has to
2090	 * synchronize.) Superb.
2091	 * BUT! We get another subtle race condition. hard_start_xmit
2092	 * may think that ring is full between wakeup and advancing
2093	 * tx_ret_csm and will stop device instantly! It is not so bad.
2094	 * We are guaranteed that there is something in ring, so that
2095	 * the next irq will resume transmission. To speedup this we could
2096	 * mark descriptor, which closes ring with BD_FLG_COAL_NOW
2097	 * (see ace_start_xmit).
2098	 *
2099	 * Well, this dilemma exists in all lock-free devices.
2100	 * We, following scheme used in drivers by Donald Becker,
2101	 * select the least dangerous.
2102	 *							--ANK
2103	 */
2104}
2105
2106
2107static irqreturn_t ace_interrupt(int irq, void *dev_id)
2108{
2109	struct net_device *dev = (struct net_device *)dev_id;
2110	struct ace_private *ap = netdev_priv(dev);
2111	struct ace_regs __iomem *regs = ap->regs;
2112	u32 idx;
2113	u32 txcsm, rxretcsm, rxretprd;
2114	u32 evtcsm, evtprd;
2115
2116	/*
2117	 * In case of PCI shared interrupts or spurious interrupts,
2118	 * we want to make sure it is actually our interrupt before
2119	 * spending any time in here.
2120	 */
2121	if (!(readl(&regs->HostCtrl) & IN_INT))
2122		return IRQ_NONE;
2123
2124	/*
2125	 * ACK intr now. Otherwise we will lose updates to rx_ret_prd,
2126	 * which happened _after_ rxretprd = *ap->rx_ret_prd; but before
2127	 * writel(0, &regs->Mb0Lo).
2128	 *
2129	 * "IRQ avoidance" recommended in docs applies to IRQs served
2130	 * threads and it is wrong even for that case.
2131	 */
2132	writel(0, &regs->Mb0Lo);
2133	readl(&regs->Mb0Lo);
2134
2135	/*
2136	 * There is no conflict between transmit handling in
2137	 * start_xmit and receive processing, thus there is no reason
2138	 * to take a spin lock for RX handling. Wait until we start
2139	 * working on the other stuff - hey we don't need a spin lock
2140	 * anymore.
2141	 */
2142	rxretprd = *ap->rx_ret_prd;
2143	rxretcsm = ap->cur_rx;
2144
2145	if (rxretprd != rxretcsm)
2146		ace_rx_int(dev, rxretprd, rxretcsm);
2147
2148	txcsm = *ap->tx_csm;
2149	idx = ap->tx_ret_csm;
2150
2151	if (txcsm != idx) {
2152		/*
2153		 * If each skb takes only one descriptor this check degenerates
2154		 * to identity, because new space has just been opened.
2155		 * But if skbs are fragmented we must check that this index
2156		 * update releases enough of space, otherwise we just
2157		 * wait for device to make more work.
2158		 */
2159		if (!tx_ring_full(ap, txcsm, ap->tx_prd))
2160			ace_tx_int(dev, txcsm, idx);
2161	}
2162
2163	evtcsm = readl(&regs->EvtCsm);
2164	evtprd = *ap->evt_prd;
2165
2166	if (evtcsm != evtprd) {
2167		evtcsm = ace_handle_event(dev, evtcsm, evtprd);
2168		writel(evtcsm, &regs->EvtCsm);
2169	}
2170
2171	/*
2172	 * This has to go last in the interrupt handler and run with
2173	 * the spin lock released ... what lock?
2174	 */
2175	if (netif_running(dev)) {
2176		int cur_size;
2177		int run_tasklet = 0;
2178
2179		cur_size = atomic_read(&ap->cur_rx_bufs);
2180		if (cur_size < RX_LOW_STD_THRES) {
2181			if ((cur_size < RX_PANIC_STD_THRES) &&
2182			    !test_and_set_bit(0, &ap->std_refill_busy)) {
2183#ifdef DEBUG
2184				printk("low on std buffers %i\n", cur_size);
2185#endif
2186				ace_load_std_rx_ring(dev,
2187						     RX_RING_SIZE - cur_size);
2188			} else
2189				run_tasklet = 1;
2190		}
2191
2192		if (!ACE_IS_TIGON_I(ap)) {
2193			cur_size = atomic_read(&ap->cur_mini_bufs);
2194			if (cur_size < RX_LOW_MINI_THRES) {
2195				if ((cur_size < RX_PANIC_MINI_THRES) &&
2196				    !test_and_set_bit(0,
2197						      &ap->mini_refill_busy)) {
2198#ifdef DEBUG
2199					printk("low on mini buffers %i\n",
2200					       cur_size);
2201#endif
2202					ace_load_mini_rx_ring(dev,
2203							      RX_MINI_SIZE - cur_size);
2204				} else
2205					run_tasklet = 1;
2206			}
2207		}
2208
2209		if (ap->jumbo) {
2210			cur_size = atomic_read(&ap->cur_jumbo_bufs);
2211			if (cur_size < RX_LOW_JUMBO_THRES) {
2212				if ((cur_size < RX_PANIC_JUMBO_THRES) &&
2213				    !test_and_set_bit(0,
2214						      &ap->jumbo_refill_busy)){
2215#ifdef DEBUG
2216					printk("low on jumbo buffers %i\n",
2217					       cur_size);
2218#endif
2219					ace_load_jumbo_rx_ring(dev,
2220							       RX_JUMBO_SIZE - cur_size);
2221				} else
2222					run_tasklet = 1;
2223			}
2224		}
2225		if (run_tasklet && !ap->tasklet_pending) {
2226			ap->tasklet_pending = 1;
2227			tasklet_schedule(&ap->ace_tasklet);
2228		}
2229	}
2230
2231	return IRQ_HANDLED;
2232}
2233
2234static int ace_open(struct net_device *dev)
2235{
2236	struct ace_private *ap = netdev_priv(dev);
2237	struct ace_regs __iomem *regs = ap->regs;
2238	struct cmd cmd;
2239
2240	if (!(ap->fw_running)) {
2241		printk(KERN_WARNING "%s: Firmware not running!\n", dev->name);
2242		return -EBUSY;
2243	}
2244
2245	writel(dev->mtu + ETH_HLEN + 4, &regs->IfMtu);
2246
2247	cmd.evt = C_CLEAR_STATS;
2248	cmd.code = 0;
2249	cmd.idx = 0;
2250	ace_issue_cmd(regs, &cmd);
2251
2252	cmd.evt = C_HOST_STATE;
2253	cmd.code = C_C_STACK_UP;
2254	cmd.idx = 0;
2255	ace_issue_cmd(regs, &cmd);
2256
2257	if (ap->jumbo &&
2258	    !test_and_set_bit(0, &ap->jumbo_refill_busy))
2259		ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
2260
2261	if (dev->flags & IFF_PROMISC) {
2262		cmd.evt = C_SET_PROMISC_MODE;
2263		cmd.code = C_C_PROMISC_ENABLE;
2264		cmd.idx = 0;
2265		ace_issue_cmd(regs, &cmd);
2266
2267		ap->promisc = 1;
2268	}else
2269		ap->promisc = 0;
2270	ap->mcast_all = 0;
2271
2272#if 0
2273	cmd.evt = C_LNK_NEGOTIATION;
2274	cmd.code = 0;
2275	cmd.idx = 0;
2276	ace_issue_cmd(regs, &cmd);
2277#endif
2278
2279	netif_start_queue(dev);
2280
2281	/*
2282	 * Setup the bottom half rx ring refill handler
2283	 */
2284	tasklet_init(&ap->ace_tasklet, ace_tasklet, (unsigned long)dev);
2285	return 0;
2286}
2287
2288
2289static int ace_close(struct net_device *dev)
2290{
2291	struct ace_private *ap = netdev_priv(dev);
2292	struct ace_regs __iomem *regs = ap->regs;
2293	struct cmd cmd;
2294	unsigned long flags;
2295	short i;
2296
2297	/*
2298	 * Without (or before) releasing irq and stopping hardware, this
2299	 * is an absolute non-sense, by the way. It will be reset instantly
2300	 * by the first irq.
2301	 */
2302	netif_stop_queue(dev);
2303
2304
2305	if (ap->promisc) {
2306		cmd.evt = C_SET_PROMISC_MODE;
2307		cmd.code = C_C_PROMISC_DISABLE;
2308		cmd.idx = 0;
2309		ace_issue_cmd(regs, &cmd);
2310		ap->promisc = 0;
2311	}
2312
2313	cmd.evt = C_HOST_STATE;
2314	cmd.code = C_C_STACK_DOWN;
2315	cmd.idx = 0;
2316	ace_issue_cmd(regs, &cmd);
2317
2318	tasklet_kill(&ap->ace_tasklet);
2319
2320	/*
2321	 * Make sure one CPU is not processing packets while
2322	 * buffers are being released by another.
2323	 */
2324
2325	local_irq_save(flags);
2326	ace_mask_irq(dev);
2327
2328	for (i = 0; i < ACE_TX_RING_ENTRIES(ap); i++) {
2329		struct sk_buff *skb;
2330		struct tx_ring_info *info;
2331
2332		info = ap->skb->tx_skbuff + i;
2333		skb = info->skb;
2334
2335		if (dma_unmap_len(info, maplen)) {
2336			if (ACE_IS_TIGON_I(ap)) {
2337				/* NB: TIGON_1 is special, tx_ring is in io space */
2338				struct tx_desc __iomem *tx;
2339				tx = (__force struct tx_desc __iomem *) &ap->tx_ring[i];
2340				writel(0, &tx->addr.addrhi);
2341				writel(0, &tx->addr.addrlo);
2342				writel(0, &tx->flagsize);
2343			} else
2344				memset(ap->tx_ring + i, 0,
2345				       sizeof(struct tx_desc));
2346			pci_unmap_page(ap->pdev, dma_unmap_addr(info, mapping),
 
2347				       dma_unmap_len(info, maplen),
2348				       PCI_DMA_TODEVICE);
2349			dma_unmap_len_set(info, maplen, 0);
2350		}
2351		if (skb) {
2352			dev_kfree_skb(skb);
2353			info->skb = NULL;
2354		}
2355	}
2356
2357	if (ap->jumbo) {
2358		cmd.evt = C_RESET_JUMBO_RNG;
2359		cmd.code = 0;
2360		cmd.idx = 0;
2361		ace_issue_cmd(regs, &cmd);
2362	}
2363
2364	ace_unmask_irq(dev);
2365	local_irq_restore(flags);
2366
2367	return 0;
2368}
2369
2370
2371static inline dma_addr_t
2372ace_map_tx_skb(struct ace_private *ap, struct sk_buff *skb,
2373	       struct sk_buff *tail, u32 idx)
2374{
2375	dma_addr_t mapping;
2376	struct tx_ring_info *info;
2377
2378	mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
2379			       offset_in_page(skb->data),
2380			       skb->len, PCI_DMA_TODEVICE);
2381
2382	info = ap->skb->tx_skbuff + idx;
2383	info->skb = tail;
2384	dma_unmap_addr_set(info, mapping, mapping);
2385	dma_unmap_len_set(info, maplen, skb->len);
2386	return mapping;
2387}
2388
2389
2390static inline void
2391ace_load_tx_bd(struct ace_private *ap, struct tx_desc *desc, u64 addr,
2392	       u32 flagsize, u32 vlan_tag)
2393{
2394#if !USE_TX_COAL_NOW
2395	flagsize &= ~BD_FLG_COAL_NOW;
2396#endif
2397
2398	if (ACE_IS_TIGON_I(ap)) {
2399		struct tx_desc __iomem *io = (__force struct tx_desc __iomem *) desc;
2400		writel(addr >> 32, &io->addr.addrhi);
2401		writel(addr & 0xffffffff, &io->addr.addrlo);
2402		writel(flagsize, &io->flagsize);
2403		writel(vlan_tag, &io->vlanres);
2404	} else {
2405		desc->addr.addrhi = addr >> 32;
2406		desc->addr.addrlo = addr;
2407		desc->flagsize = flagsize;
2408		desc->vlanres = vlan_tag;
2409	}
2410}
2411
2412
2413static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
2414				  struct net_device *dev)
2415{
2416	struct ace_private *ap = netdev_priv(dev);
2417	struct ace_regs __iomem *regs = ap->regs;
2418	struct tx_desc *desc;
2419	u32 idx, flagsize;
2420	unsigned long maxjiff = jiffies + 3*HZ;
2421
2422restart:
2423	idx = ap->tx_prd;
2424
2425	if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2426		goto overflow;
2427
2428	if (!skb_shinfo(skb)->nr_frags)	{
2429		dma_addr_t mapping;
2430		u32 vlan_tag = 0;
2431
2432		mapping = ace_map_tx_skb(ap, skb, skb, idx);
2433		flagsize = (skb->len << 16) | (BD_FLG_END);
2434		if (skb->ip_summed == CHECKSUM_PARTIAL)
2435			flagsize |= BD_FLG_TCP_UDP_SUM;
2436		if (skb_vlan_tag_present(skb)) {
2437			flagsize |= BD_FLG_VLAN_TAG;
2438			vlan_tag = skb_vlan_tag_get(skb);
2439		}
2440		desc = ap->tx_ring + idx;
2441		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2442
2443		/* Look at ace_tx_int for explanations. */
2444		if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2445			flagsize |= BD_FLG_COAL_NOW;
2446
2447		ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag);
2448	} else {
2449		dma_addr_t mapping;
2450		u32 vlan_tag = 0;
2451		int i, len = 0;
2452
2453		mapping = ace_map_tx_skb(ap, skb, NULL, idx);
2454		flagsize = (skb_headlen(skb) << 16);
2455		if (skb->ip_summed == CHECKSUM_PARTIAL)
2456			flagsize |= BD_FLG_TCP_UDP_SUM;
2457		if (skb_vlan_tag_present(skb)) {
2458			flagsize |= BD_FLG_VLAN_TAG;
2459			vlan_tag = skb_vlan_tag_get(skb);
2460		}
2461
2462		ace_load_tx_bd(ap, ap->tx_ring + idx, mapping, flagsize, vlan_tag);
2463
2464		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2465
2466		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2467			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2468			struct tx_ring_info *info;
2469
2470			len += skb_frag_size(frag);
2471			info = ap->skb->tx_skbuff + idx;
2472			desc = ap->tx_ring + idx;
2473
2474			mapping = skb_frag_dma_map(&ap->pdev->dev, frag, 0,
2475						   skb_frag_size(frag),
2476						   DMA_TO_DEVICE);
2477
2478			flagsize = skb_frag_size(frag) << 16;
2479			if (skb->ip_summed == CHECKSUM_PARTIAL)
2480				flagsize |= BD_FLG_TCP_UDP_SUM;
2481			idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
2482
2483			if (i == skb_shinfo(skb)->nr_frags - 1) {
2484				flagsize |= BD_FLG_END;
2485				if (tx_ring_full(ap, ap->tx_ret_csm, idx))
2486					flagsize |= BD_FLG_COAL_NOW;
2487
2488				/*
2489				 * Only the last fragment frees
2490				 * the skb!
2491				 */
2492				info->skb = skb;
2493			} else {
2494				info->skb = NULL;
2495			}
2496			dma_unmap_addr_set(info, mapping, mapping);
2497			dma_unmap_len_set(info, maplen, skb_frag_size(frag));
2498			ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag);
2499		}
2500	}
2501
2502 	wmb();
2503 	ap->tx_prd = idx;
2504 	ace_set_txprd(regs, ap, idx);
2505
2506	if (flagsize & BD_FLG_COAL_NOW) {
2507		netif_stop_queue(dev);
2508
2509		/*
2510		 * A TX-descriptor producer (an IRQ) might have gotten
2511		 * between, making the ring free again. Since xmit is
2512		 * serialized, this is the only situation we have to
2513		 * re-test.
2514		 */
2515		if (!tx_ring_full(ap, ap->tx_ret_csm, idx))
2516			netif_wake_queue(dev);
2517	}
2518
2519	return NETDEV_TX_OK;
2520
2521overflow:
2522	/*
2523	 * This race condition is unavoidable with lock-free drivers.
2524	 * We wake up the queue _before_ tx_prd is advanced, so that we can
2525	 * enter hard_start_xmit too early, while tx ring still looks closed.
2526	 * This happens ~1-4 times per 100000 packets, so that we can allow
2527	 * to loop syncing to other CPU. Probably, we need an additional
2528	 * wmb() in ace_tx_intr as well.
2529	 *
2530	 * Note that this race is relieved by reserving one more entry
2531	 * in tx ring than it is necessary (see original non-SG driver).
2532	 * However, with SG we need to reserve 2*MAX_SKB_FRAGS+1, which
2533	 * is already overkill.
2534	 *
2535	 * Alternative is to return with 1 not throttling queue. In this
2536	 * case loop becomes longer, no more useful effects.
2537	 */
2538	if (time_before(jiffies, maxjiff)) {
2539		barrier();
2540		cpu_relax();
2541		goto restart;
2542	}
2543
2544	/* The ring is stuck full. */
2545	printk(KERN_WARNING "%s: Transmit ring stuck full\n", dev->name);
2546	return NETDEV_TX_BUSY;
2547}
2548
2549
2550static int ace_change_mtu(struct net_device *dev, int new_mtu)
2551{
2552	struct ace_private *ap = netdev_priv(dev);
2553	struct ace_regs __iomem *regs = ap->regs;
2554
2555	writel(new_mtu + ETH_HLEN + 4, &regs->IfMtu);
2556	dev->mtu = new_mtu;
2557
2558	if (new_mtu > ACE_STD_MTU) {
2559		if (!(ap->jumbo)) {
2560			printk(KERN_INFO "%s: Enabling Jumbo frame "
2561			       "support\n", dev->name);
2562			ap->jumbo = 1;
2563			if (!test_and_set_bit(0, &ap->jumbo_refill_busy))
2564				ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
2565			ace_set_rxtx_parms(dev, 1);
2566		}
2567	} else {
2568		while (test_and_set_bit(0, &ap->jumbo_refill_busy));
2569		ace_sync_irq(dev->irq);
2570		ace_set_rxtx_parms(dev, 0);
2571		if (ap->jumbo) {
2572			struct cmd cmd;
2573
2574			cmd.evt = C_RESET_JUMBO_RNG;
2575			cmd.code = 0;
2576			cmd.idx = 0;
2577			ace_issue_cmd(regs, &cmd);
2578		}
2579	}
2580
2581	return 0;
2582}
2583
2584static int ace_get_link_ksettings(struct net_device *dev,
2585				  struct ethtool_link_ksettings *cmd)
2586{
2587	struct ace_private *ap = netdev_priv(dev);
2588	struct ace_regs __iomem *regs = ap->regs;
2589	u32 link;
2590	u32 supported;
2591
2592	memset(cmd, 0, sizeof(struct ethtool_link_ksettings));
2593
2594	supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
2595		     SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
2596		     SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full |
2597		     SUPPORTED_Autoneg | SUPPORTED_FIBRE);
2598
2599	cmd->base.port = PORT_FIBRE;
2600
2601	link = readl(&regs->GigLnkState);
2602	if (link & LNK_1000MB) {
2603		cmd->base.speed = SPEED_1000;
2604	} else {
2605		link = readl(&regs->FastLnkState);
2606		if (link & LNK_100MB)
2607			cmd->base.speed = SPEED_100;
2608		else if (link & LNK_10MB)
2609			cmd->base.speed = SPEED_10;
2610		else
2611			cmd->base.speed = 0;
2612	}
2613	if (link & LNK_FULL_DUPLEX)
2614		cmd->base.duplex = DUPLEX_FULL;
2615	else
2616		cmd->base.duplex = DUPLEX_HALF;
2617
2618	if (link & LNK_NEGOTIATE)
2619		cmd->base.autoneg = AUTONEG_ENABLE;
2620	else
2621		cmd->base.autoneg = AUTONEG_DISABLE;
2622
2623#if 0
2624	/*
2625	 * Current struct ethtool_cmd is insufficient
2626	 */
2627	ecmd->trace = readl(&regs->TuneTrace);
2628
2629	ecmd->txcoal = readl(&regs->TuneTxCoalTicks);
2630	ecmd->rxcoal = readl(&regs->TuneRxCoalTicks);
2631#endif
2632
2633	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
2634						supported);
2635
2636	return 0;
2637}
2638
2639static int ace_set_link_ksettings(struct net_device *dev,
2640				  const struct ethtool_link_ksettings *cmd)
2641{
2642	struct ace_private *ap = netdev_priv(dev);
2643	struct ace_regs __iomem *regs = ap->regs;
2644	u32 link, speed;
2645
2646	link = readl(&regs->GigLnkState);
2647	if (link & LNK_1000MB)
2648		speed = SPEED_1000;
2649	else {
2650		link = readl(&regs->FastLnkState);
2651		if (link & LNK_100MB)
2652			speed = SPEED_100;
2653		else if (link & LNK_10MB)
2654			speed = SPEED_10;
2655		else
2656			speed = SPEED_100;
2657	}
2658
2659	link = LNK_ENABLE | LNK_1000MB | LNK_100MB | LNK_10MB |
2660		LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL;
2661	if (!ACE_IS_TIGON_I(ap))
2662		link |= LNK_TX_FLOW_CTL_Y;
2663	if (cmd->base.autoneg == AUTONEG_ENABLE)
2664		link |= LNK_NEGOTIATE;
2665	if (cmd->base.speed != speed) {
2666		link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB);
2667		switch (cmd->base.speed) {
2668		case SPEED_1000:
2669			link |= LNK_1000MB;
2670			break;
2671		case SPEED_100:
2672			link |= LNK_100MB;
2673			break;
2674		case SPEED_10:
2675			link |= LNK_10MB;
2676			break;
2677		}
2678	}
2679
2680	if (cmd->base.duplex == DUPLEX_FULL)
2681		link |= LNK_FULL_DUPLEX;
2682
2683	if (link != ap->link) {
2684		struct cmd cmd;
2685		printk(KERN_INFO "%s: Renegotiating link state\n",
2686		       dev->name);
2687
2688		ap->link = link;
2689		writel(link, &regs->TuneLink);
2690		if (!ACE_IS_TIGON_I(ap))
2691			writel(link, &regs->TuneFastLink);
2692		wmb();
2693
2694		cmd.evt = C_LNK_NEGOTIATION;
2695		cmd.code = 0;
2696		cmd.idx = 0;
2697		ace_issue_cmd(regs, &cmd);
2698	}
2699	return 0;
2700}
2701
2702static void ace_get_drvinfo(struct net_device *dev,
2703			    struct ethtool_drvinfo *info)
2704{
2705	struct ace_private *ap = netdev_priv(dev);
2706
2707	strlcpy(info->driver, "acenic", sizeof(info->driver));
2708	snprintf(info->version, sizeof(info->version), "%i.%i.%i",
2709		 ap->firmware_major, ap->firmware_minor,
2710		 ap->firmware_fix);
2711
2712	if (ap->pdev)
2713		strlcpy(info->bus_info, pci_name(ap->pdev),
2714			sizeof(info->bus_info));
2715
2716}
2717
2718/*
2719 * Set the hardware MAC address.
2720 */
2721static int ace_set_mac_addr(struct net_device *dev, void *p)
2722{
2723	struct ace_private *ap = netdev_priv(dev);
2724	struct ace_regs __iomem *regs = ap->regs;
2725	struct sockaddr *addr=p;
2726	u8 *da;
2727	struct cmd cmd;
2728
2729	if(netif_running(dev))
2730		return -EBUSY;
2731
2732	memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
2733
2734	da = (u8 *)dev->dev_addr;
2735
2736	writel(da[0] << 8 | da[1], &regs->MacAddrHi);
2737	writel((da[2] << 24) | (da[3] << 16) | (da[4] << 8) | da[5],
2738	       &regs->MacAddrLo);
2739
2740	cmd.evt = C_SET_MAC_ADDR;
2741	cmd.code = 0;
2742	cmd.idx = 0;
2743	ace_issue_cmd(regs, &cmd);
2744
2745	return 0;
2746}
2747
2748
2749static void ace_set_multicast_list(struct net_device *dev)
2750{
2751	struct ace_private *ap = netdev_priv(dev);
2752	struct ace_regs __iomem *regs = ap->regs;
2753	struct cmd cmd;
2754
2755	if ((dev->flags & IFF_ALLMULTI) && !(ap->mcast_all)) {
2756		cmd.evt = C_SET_MULTICAST_MODE;
2757		cmd.code = C_C_MCAST_ENABLE;
2758		cmd.idx = 0;
2759		ace_issue_cmd(regs, &cmd);
2760		ap->mcast_all = 1;
2761	} else if (ap->mcast_all) {
2762		cmd.evt = C_SET_MULTICAST_MODE;
2763		cmd.code = C_C_MCAST_DISABLE;
2764		cmd.idx = 0;
2765		ace_issue_cmd(regs, &cmd);
2766		ap->mcast_all = 0;
2767	}
2768
2769	if ((dev->flags & IFF_PROMISC) && !(ap->promisc)) {
2770		cmd.evt = C_SET_PROMISC_MODE;
2771		cmd.code = C_C_PROMISC_ENABLE;
2772		cmd.idx = 0;
2773		ace_issue_cmd(regs, &cmd);
2774		ap->promisc = 1;
2775	}else if (!(dev->flags & IFF_PROMISC) && (ap->promisc)) {
2776		cmd.evt = C_SET_PROMISC_MODE;
2777		cmd.code = C_C_PROMISC_DISABLE;
2778		cmd.idx = 0;
2779		ace_issue_cmd(regs, &cmd);
2780		ap->promisc = 0;
2781	}
2782
2783	/*
2784	 * For the time being multicast relies on the upper layers
2785	 * filtering it properly. The Firmware does not allow one to
2786	 * set the entire multicast list at a time and keeping track of
2787	 * it here is going to be messy.
2788	 */
2789	if (!netdev_mc_empty(dev) && !ap->mcast_all) {
2790		cmd.evt = C_SET_MULTICAST_MODE;
2791		cmd.code = C_C_MCAST_ENABLE;
2792		cmd.idx = 0;
2793		ace_issue_cmd(regs, &cmd);
2794	}else if (!ap->mcast_all) {
2795		cmd.evt = C_SET_MULTICAST_MODE;
2796		cmd.code = C_C_MCAST_DISABLE;
2797		cmd.idx = 0;
2798		ace_issue_cmd(regs, &cmd);
2799	}
2800}
2801
2802
2803static struct net_device_stats *ace_get_stats(struct net_device *dev)
2804{
2805	struct ace_private *ap = netdev_priv(dev);
2806	struct ace_mac_stats __iomem *mac_stats =
2807		(struct ace_mac_stats __iomem *)ap->regs->Stats;
2808
2809	dev->stats.rx_missed_errors = readl(&mac_stats->drop_space);
2810	dev->stats.multicast = readl(&mac_stats->kept_mc);
2811	dev->stats.collisions = readl(&mac_stats->coll);
2812
2813	return &dev->stats;
2814}
2815
2816
2817static void ace_copy(struct ace_regs __iomem *regs, const __be32 *src,
2818		     u32 dest, int size)
2819{
2820	void __iomem *tdest;
2821	short tsize, i;
2822
2823	if (size <= 0)
2824		return;
2825
2826	while (size > 0) {
2827		tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
2828			    min_t(u32, size, ACE_WINDOW_SIZE));
2829		tdest = (void __iomem *) &regs->Window +
2830			(dest & (ACE_WINDOW_SIZE - 1));
2831		writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);
2832		for (i = 0; i < (tsize / 4); i++) {
2833			/* Firmware is big-endian */
2834			writel(be32_to_cpup(src), tdest);
2835			src++;
2836			tdest += 4;
2837			dest += 4;
2838			size -= 4;
2839		}
2840	}
2841}
2842
2843
2844static void ace_clear(struct ace_regs __iomem *regs, u32 dest, int size)
2845{
2846	void __iomem *tdest;
2847	short tsize = 0, i;
2848
2849	if (size <= 0)
2850		return;
2851
2852	while (size > 0) {
2853		tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1),
2854				min_t(u32, size, ACE_WINDOW_SIZE));
2855		tdest = (void __iomem *) &regs->Window +
2856			(dest & (ACE_WINDOW_SIZE - 1));
2857		writel(dest & ~(ACE_WINDOW_SIZE - 1), &regs->WinBase);
2858
2859		for (i = 0; i < (tsize / 4); i++) {
2860			writel(0, tdest + i*4);
2861		}
2862
2863		dest += tsize;
2864		size -= tsize;
2865	}
2866}
2867
2868
2869/*
2870 * Download the firmware into the SRAM on the NIC
2871 *
2872 * This operation requires the NIC to be halted and is performed with
2873 * interrupts disabled and with the spinlock hold.
2874 */
2875static int ace_load_firmware(struct net_device *dev)
2876{
2877	const struct firmware *fw;
2878	const char *fw_name = "acenic/tg2.bin";
2879	struct ace_private *ap = netdev_priv(dev);
2880	struct ace_regs __iomem *regs = ap->regs;
2881	const __be32 *fw_data;
2882	u32 load_addr;
2883	int ret;
2884
2885	if (!(readl(&regs->CpuCtrl) & CPU_HALTED)) {
2886		printk(KERN_ERR "%s: trying to download firmware while the "
2887		       "CPU is running!\n", ap->name);
2888		return -EFAULT;
2889	}
2890
2891	if (ACE_IS_TIGON_I(ap))
2892		fw_name = "acenic/tg1.bin";
2893
2894	ret = request_firmware(&fw, fw_name, &ap->pdev->dev);
2895	if (ret) {
2896		printk(KERN_ERR "%s: Failed to load firmware \"%s\"\n",
2897		       ap->name, fw_name);
2898		return ret;
2899	}
2900
2901	fw_data = (void *)fw->data;
2902
2903	/* Firmware blob starts with version numbers, followed by
2904	   load and start address. Remainder is the blob to be loaded
2905	   contiguously from load address. We don't bother to represent
2906	   the BSS/SBSS sections any more, since we were clearing the
2907	   whole thing anyway. */
2908	ap->firmware_major = fw->data[0];
2909	ap->firmware_minor = fw->data[1];
2910	ap->firmware_fix = fw->data[2];
2911
2912	ap->firmware_start = be32_to_cpu(fw_data[1]);
2913	if (ap->firmware_start < 0x4000 || ap->firmware_start >= 0x80000) {
2914		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
2915		       ap->name, ap->firmware_start, fw_name);
2916		ret = -EINVAL;
2917		goto out;
2918	}
2919
2920	load_addr = be32_to_cpu(fw_data[2]);
2921	if (load_addr < 0x4000 || load_addr >= 0x80000) {
2922		printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n",
2923		       ap->name, load_addr, fw_name);
2924		ret = -EINVAL;
2925		goto out;
2926	}
2927
2928	/*
2929	 * Do not try to clear more than 512KiB or we end up seeing
2930	 * funny things on NICs with only 512KiB SRAM
2931	 */
2932	ace_clear(regs, 0x2000, 0x80000-0x2000);
2933	ace_copy(regs, &fw_data[3], load_addr, fw->size-12);
2934 out:
2935	release_firmware(fw);
2936	return ret;
2937}
2938
2939
2940/*
2941 * The eeprom on the AceNIC is an Atmel i2c EEPROM.
2942 *
2943 * Accessing the EEPROM is `interesting' to say the least - don't read
2944 * this code right after dinner.
2945 *
2946 * This is all about black magic and bit-banging the device .... I
2947 * wonder in what hospital they have put the guy who designed the i2c
2948 * specs.
2949 *
2950 * Oh yes, this is only the beginning!
2951 *
2952 * Thanks to Stevarino Webinski for helping tracking down the bugs in the
2953 * code i2c readout code by beta testing all my hacks.
2954 */
2955static void eeprom_start(struct ace_regs __iomem *regs)
2956{
2957	u32 local;
2958
2959	readl(&regs->LocalCtrl);
2960	udelay(ACE_SHORT_DELAY);
2961	local = readl(&regs->LocalCtrl);
2962	local |= EEPROM_DATA_OUT | EEPROM_WRITE_ENABLE;
2963	writel(local, &regs->LocalCtrl);
2964	readl(&regs->LocalCtrl);
2965	mb();
2966	udelay(ACE_SHORT_DELAY);
2967	local |= EEPROM_CLK_OUT;
2968	writel(local, &regs->LocalCtrl);
2969	readl(&regs->LocalCtrl);
2970	mb();
2971	udelay(ACE_SHORT_DELAY);
2972	local &= ~EEPROM_DATA_OUT;
2973	writel(local, &regs->LocalCtrl);
2974	readl(&regs->LocalCtrl);
2975	mb();
2976	udelay(ACE_SHORT_DELAY);
2977	local &= ~EEPROM_CLK_OUT;
2978	writel(local, &regs->LocalCtrl);
2979	readl(&regs->LocalCtrl);
2980	mb();
2981}
2982
2983
2984static void eeprom_prep(struct ace_regs __iomem *regs, u8 magic)
2985{
2986	short i;
2987	u32 local;
2988
2989	udelay(ACE_SHORT_DELAY);
2990	local = readl(&regs->LocalCtrl);
2991	local &= ~EEPROM_DATA_OUT;
2992	local |= EEPROM_WRITE_ENABLE;
2993	writel(local, &regs->LocalCtrl);
2994	readl(&regs->LocalCtrl);
2995	mb();
2996
2997	for (i = 0; i < 8; i++, magic <<= 1) {
2998		udelay(ACE_SHORT_DELAY);
2999		if (magic & 0x80)
3000			local |= EEPROM_DATA_OUT;
3001		else
3002			local &= ~EEPROM_DATA_OUT;
3003		writel(local, &regs->LocalCtrl);
3004		readl(&regs->LocalCtrl);
3005		mb();
3006
3007		udelay(ACE_SHORT_DELAY);
3008		local |= EEPROM_CLK_OUT;
3009		writel(local, &regs->LocalCtrl);
3010		readl(&regs->LocalCtrl);
3011		mb();
3012		udelay(ACE_SHORT_DELAY);
3013		local &= ~(EEPROM_CLK_OUT | EEPROM_DATA_OUT);
3014		writel(local, &regs->LocalCtrl);
3015		readl(&regs->LocalCtrl);
3016		mb();
3017	}
3018}
3019
3020
3021static int eeprom_check_ack(struct ace_regs __iomem *regs)
3022{
3023	int state;
3024	u32 local;
3025
3026	local = readl(&regs->LocalCtrl);
3027	local &= ~EEPROM_WRITE_ENABLE;
3028	writel(local, &regs->LocalCtrl);
3029	readl(&regs->LocalCtrl);
3030	mb();
3031	udelay(ACE_LONG_DELAY);
3032	local |= EEPROM_CLK_OUT;
3033	writel(local, &regs->LocalCtrl);
3034	readl(&regs->LocalCtrl);
3035	mb();
3036	udelay(ACE_SHORT_DELAY);
3037	/* sample data in middle of high clk */
3038	state = (readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0;
3039	udelay(ACE_SHORT_DELAY);
3040	mb();
3041	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
3042	readl(&regs->LocalCtrl);
3043	mb();
3044
3045	return state;
3046}
3047
3048
3049static void eeprom_stop(struct ace_regs __iomem *regs)
3050{
3051	u32 local;
3052
3053	udelay(ACE_SHORT_DELAY);
3054	local = readl(&regs->LocalCtrl);
3055	local |= EEPROM_WRITE_ENABLE;
3056	writel(local, &regs->LocalCtrl);
3057	readl(&regs->LocalCtrl);
3058	mb();
3059	udelay(ACE_SHORT_DELAY);
3060	local &= ~EEPROM_DATA_OUT;
3061	writel(local, &regs->LocalCtrl);
3062	readl(&regs->LocalCtrl);
3063	mb();
3064	udelay(ACE_SHORT_DELAY);
3065	local |= EEPROM_CLK_OUT;
3066	writel(local, &regs->LocalCtrl);
3067	readl(&regs->LocalCtrl);
3068	mb();
3069	udelay(ACE_SHORT_DELAY);
3070	local |= EEPROM_DATA_OUT;
3071	writel(local, &regs->LocalCtrl);
3072	readl(&regs->LocalCtrl);
3073	mb();
3074	udelay(ACE_LONG_DELAY);
3075	local &= ~EEPROM_CLK_OUT;
3076	writel(local, &regs->LocalCtrl);
3077	mb();
3078}
3079
3080
3081/*
3082 * Read a whole byte from the EEPROM.
3083 */
3084static int read_eeprom_byte(struct net_device *dev, unsigned long offset)
3085{
3086	struct ace_private *ap = netdev_priv(dev);
3087	struct ace_regs __iomem *regs = ap->regs;
3088	unsigned long flags;
3089	u32 local;
3090	int result = 0;
3091	short i;
3092
3093	/*
3094	 * Don't take interrupts on this CPU will bit banging
3095	 * the %#%#@$ I2C device
3096	 */
3097	local_irq_save(flags);
3098
3099	eeprom_start(regs);
3100
3101	eeprom_prep(regs, EEPROM_WRITE_SELECT);
3102	if (eeprom_check_ack(regs)) {
3103		local_irq_restore(flags);
3104		printk(KERN_ERR "%s: Unable to sync eeprom\n", ap->name);
3105		result = -EIO;
3106		goto eeprom_read_error;
3107	}
3108
3109	eeprom_prep(regs, (offset >> 8) & 0xff);
3110	if (eeprom_check_ack(regs)) {
3111		local_irq_restore(flags);
3112		printk(KERN_ERR "%s: Unable to set address byte 0\n",
3113		       ap->name);
3114		result = -EIO;
3115		goto eeprom_read_error;
3116	}
3117
3118	eeprom_prep(regs, offset & 0xff);
3119	if (eeprom_check_ack(regs)) {
3120		local_irq_restore(flags);
3121		printk(KERN_ERR "%s: Unable to set address byte 1\n",
3122		       ap->name);
3123		result = -EIO;
3124		goto eeprom_read_error;
3125	}
3126
3127	eeprom_start(regs);
3128	eeprom_prep(regs, EEPROM_READ_SELECT);
3129	if (eeprom_check_ack(regs)) {
3130		local_irq_restore(flags);
3131		printk(KERN_ERR "%s: Unable to set READ_SELECT\n",
3132		       ap->name);
3133		result = -EIO;
3134		goto eeprom_read_error;
3135	}
3136
3137	for (i = 0; i < 8; i++) {
3138		local = readl(&regs->LocalCtrl);
3139		local &= ~EEPROM_WRITE_ENABLE;
3140		writel(local, &regs->LocalCtrl);
3141		readl(&regs->LocalCtrl);
3142		udelay(ACE_LONG_DELAY);
3143		mb();
3144		local |= EEPROM_CLK_OUT;
3145		writel(local, &regs->LocalCtrl);
3146		readl(&regs->LocalCtrl);
3147		mb();
3148		udelay(ACE_SHORT_DELAY);
3149		/* sample data mid high clk */
3150		result = (result << 1) |
3151			((readl(&regs->LocalCtrl) & EEPROM_DATA_IN) != 0);
3152		udelay(ACE_SHORT_DELAY);
3153		mb();
3154		local = readl(&regs->LocalCtrl);
3155		local &= ~EEPROM_CLK_OUT;
3156		writel(local, &regs->LocalCtrl);
3157		readl(&regs->LocalCtrl);
3158		udelay(ACE_SHORT_DELAY);
3159		mb();
3160		if (i == 7) {
3161			local |= EEPROM_WRITE_ENABLE;
3162			writel(local, &regs->LocalCtrl);
3163			readl(&regs->LocalCtrl);
3164			mb();
3165			udelay(ACE_SHORT_DELAY);
3166		}
3167	}
3168
3169	local |= EEPROM_DATA_OUT;
3170	writel(local, &regs->LocalCtrl);
3171	readl(&regs->LocalCtrl);
3172	mb();
3173	udelay(ACE_SHORT_DELAY);
3174	writel(readl(&regs->LocalCtrl) | EEPROM_CLK_OUT, &regs->LocalCtrl);
3175	readl(&regs->LocalCtrl);
3176	udelay(ACE_LONG_DELAY);
3177	writel(readl(&regs->LocalCtrl) & ~EEPROM_CLK_OUT, &regs->LocalCtrl);
3178	readl(&regs->LocalCtrl);
3179	mb();
3180	udelay(ACE_SHORT_DELAY);
3181	eeprom_stop(regs);
3182
3183	local_irq_restore(flags);
3184 out:
3185	return result;
3186
3187 eeprom_read_error:
3188	printk(KERN_ERR "%s: Unable to read eeprom byte 0x%02lx\n",
3189	       ap->name, offset);
3190	goto out;
3191}
3192
3193module_pci_driver(acenic_pci_driver);