Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
Note: File does not exist in v6.2.
   1/*
   2  Madge Ambassador ATM Adapter driver.
   3  Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5  This program is free software; you can redistribute it and/or modify
   6  it under the terms of the GNU General Public License as published by
   7  the Free Software Foundation; either version 2 of the License, or
   8  (at your option) any later version.
   9
  10  This program is distributed in the hope that it will be useful,
  11  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  GNU General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with this program; if not, write to the Free Software
  17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19  The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20  system and in the file COPYING in the Linux kernel source.
  21*/
  22
  23/* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25#include <linux/module.h>
  26#include <linux/types.h>
  27#include <linux/pci.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/ioport.h>
  31#include <linux/atmdev.h>
  32#include <linux/delay.h>
  33#include <linux/interrupt.h>
  34#include <linux/poison.h>
  35#include <linux/bitrev.h>
  36#include <linux/mutex.h>
  37#include <linux/firmware.h>
  38#include <linux/ihex.h>
  39#include <linux/slab.h>
  40
  41#include <linux/atomic.h>
  42#include <asm/io.h>
  43#include <asm/byteorder.h>
  44
  45#include "ambassador.h"
  46
  47#define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  48#define description_string "Madge ATM Ambassador driver"
  49#define version_string "1.2.4"
  50
  51static inline void __init show_version (void) {
  52  printk ("%s version %s\n", description_string, version_string);
  53}
  54
  55/*
  56  
  57  Theory of Operation
  58  
  59  I Hardware, detection, initialisation and shutdown.
  60  
  61  1. Supported Hardware
  62  
  63  This driver is for the PCI ATMizer-based Ambassador card (except
  64  very early versions). It is not suitable for the similar EISA "TR7"
  65  card. Commercially, both cards are known as Collage Server ATM
  66  adapters.
  67  
  68  The loader supports image transfer to the card, image start and few
  69  other miscellaneous commands.
  70  
  71  Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  72  
  73  The cards are big-endian.
  74  
  75  2. Detection
  76  
  77  Standard PCI stuff, the early cards are detected and rejected.
  78  
  79  3. Initialisation
  80  
  81  The cards are reset and the self-test results are checked. The
  82  microcode image is then transferred and started. This waits for a
  83  pointer to a descriptor containing details of the host-based queues
  84  and buffers and various parameters etc. Once they are processed
  85  normal operations may begin. The BIA is read using a microcode
  86  command.
  87  
  88  4. Shutdown
  89  
  90  This may be accomplished either by a card reset or via the microcode
  91  shutdown command. Further investigation required.
  92  
  93  5. Persistent state
  94  
  95  The card reset does not affect PCI configuration (good) or the
  96  contents of several other "shared run-time registers" (bad) which
  97  include doorbell and interrupt control as well as EEPROM and PCI
  98  control. The driver must be careful when modifying these registers
  99  not to touch bits it does not use and to undo any changes at exit.
 100  
 101  II Driver software
 102  
 103  0. Generalities
 104  
 105  The adapter is quite intelligent (fast) and has a simple interface
 106  (few features). VPI is always zero, 1024 VCIs are supported. There
 107  is limited cell rate support. UBR channels can be capped and ABR
 108  (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 109  support.
 110  
 111  1. Driver <-> Adapter Communication
 112  
 113  Apart from the basic loader commands, the driver communicates
 114  through three entities: the command queue (CQ), the transmit queue
 115  pair (TXQ) and the receive queue pairs (RXQ). These three entities
 116  are set up by the host and passed to the microcode just after it has
 117  been started.
 118  
 119  All queues are host-based circular queues. They are contiguous and
 120  (due to hardware limitations) have some restrictions as to their
 121  locations in (bus) memory. They are of the "full means the same as
 122  empty so don't do that" variety since the adapter uses pointers
 123  internally.
 124  
 125  The queue pairs work as follows: one queue is for supply to the
 126  adapter, items in it are pending and are owned by the adapter; the
 127  other is the queue for return from the adapter, items in it have
 128  been dealt with by the adapter. The host adds items to the supply
 129  (TX descriptors and free RX buffer descriptors) and removes items
 130  from the return (TX and RX completions). The adapter deals with out
 131  of order completions.
 132  
 133  Interrupts (card to host) and the doorbell (host to card) are used
 134  for signalling.
 135  
 136  1. CQ
 137  
 138  This is to communicate "open VC", "close VC", "get stats" etc. to
 139  the adapter. At most one command is retired every millisecond by the
 140  card. There is no out of order completion or notification. The
 141  driver needs to check the return code of the command, waiting as
 142  appropriate.
 143  
 144  2. TXQ
 145  
 146  TX supply items are of variable length (scatter gather support) and
 147  so the queue items are (more or less) pointers to the real thing.
 148  Each TX supply item contains a unique, host-supplied handle (the skb
 149  bus address seems most sensible as this works for Alphas as well,
 150  there is no need to do any endian conversions on the handles).
 151  
 152  TX return items consist of just the handles above.
 153  
 154  3. RXQ (up to 4 of these with different lengths and buffer sizes)
 155  
 156  RX supply items consist of a unique, host-supplied handle (the skb
 157  bus address again) and a pointer to the buffer data area.
 158  
 159  RX return items consist of the handle above, the VC, length and a
 160  status word. This just screams "oh so easy" doesn't it?
 161
 162  Note on RX pool sizes:
 163   
 164  Each pool should have enough buffers to handle a back-to-back stream
 165  of minimum sized frames on a single VC. For example:
 166  
 167    frame spacing = 3us (about right)
 168    
 169    delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 170    
 171    min number of buffers for one VC = 1 + delay/spacing (buffers)
 172
 173    delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 174    
 175  The 20us delay assumes that there is no need to sleep; if we need to
 176  sleep to get buffers we are going to drop frames anyway.
 177  
 178  In fact, each pool should have enough buffers to support the
 179  simultaneous reassembly of a separate frame on each VC and cope with
 180  the case in which frames complete in round robin cell fashion on
 181  each VC.
 182  
 183  Only one frame can complete at each cell arrival, so if "n" VCs are
 184  open, the worst case is to have them all complete frames together
 185  followed by all starting new frames together.
 186  
 187    desired number of buffers = n + delay/spacing
 188    
 189  These are the extreme requirements, however, they are "n+k" for some
 190  "k" so we have only the constant to choose. This is the argument
 191  rx_lats which current defaults to 7.
 192  
 193  Actually, "n ? n+k : 0" is better and this is what is implemented,
 194  subject to the limit given by the pool size.
 195  
 196  4. Driver locking
 197  
 198  Simple spinlocks are used around the TX and RX queue mechanisms.
 199  Anyone with a faster, working method is welcome to implement it.
 200  
 201  The adapter command queue is protected with a spinlock. We always
 202  wait for commands to complete.
 203  
 204  A more complex form of locking is used around parts of the VC open
 205  and close functions. There are three reasons for a lock: 1. we need
 206  to do atomic rate reservation and release (not used yet), 2. Opening
 207  sometimes involves two adapter commands which must not be separated
 208  by another command on the same VC, 3. the changes to RX pool size
 209  must be atomic. The lock needs to work over context switches, so we
 210  use a semaphore.
 211  
 212  III Hardware Features and Microcode Bugs
 213  
 214  1. Byte Ordering
 215  
 216  *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 217  
 218  2. Memory access
 219  
 220  All structures that are not accessed using DMA must be 4-byte
 221  aligned (not a problem) and must not cross 4MB boundaries.
 222  
 223  There is a DMA memory hole at E0000000-E00000FF (groan).
 224  
 225  TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 226  but for a hardware bug).
 227  
 228  RX buffers (DMA write) must not cross 16MB boundaries and must
 229  include spare trailing bytes up to the next 4-byte boundary; they
 230  will be written with rubbish.
 231  
 232  The PLX likes to prefetch; if reading up to 4 u32 past the end of
 233  each TX fragment is not a problem, then TX can be made to go a
 234  little faster by passing a flag at init that disables a prefetch
 235  workaround. We do not pass this flag. (new microcode only)
 236  
 237  Now we:
 238  . Note that alloc_skb rounds up size to a 16byte boundary.  
 239  . Ensure all areas do not traverse 4MB boundaries.
 240  . Ensure all areas do not start at a E00000xx bus address.
 241  (I cannot be certain, but this may always hold with Linux)
 242  . Make all failures cause a loud message.
 243  . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 244  . Discard non-conforming TX fragment descriptors (the TX fails).
 245  In the future we could:
 246  . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 247  . Segment TX areas into some/more fragments, when necessary.
 248  . Relax checks for non-DMA items (ignore hole).
 249  . Give scatter-gather (iovec) requirements using ???. (?)
 250  
 251  3. VC close is broken (only for new microcode)
 252  
 253  The VC close adapter microcode command fails to do anything if any
 254  frames have been received on the VC but none have been transmitted.
 255  Frames continue to be reassembled and passed (with IRQ) to the
 256  driver.
 257  
 258  IV To Do List
 259  
 260  . Fix bugs!
 261  
 262  . Timer code may be broken.
 263  
 264  . Deal with buggy VC close (somehow) in microcode 12.
 265  
 266  . Handle interrupted and/or non-blocking writes - is this a job for
 267    the protocol layer?
 268  
 269  . Add code to break up TX fragments when they span 4MB boundaries.
 270  
 271  . Add SUNI phy layer (need to know where SUNI lives on card).
 272  
 273  . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 274    leave extra headroom space for Ambassador TX descriptors.
 275  
 276  . Understand these elements of struct atm_vcc: recvq (proto?),
 277    sleep, callback, listenq, backlog_quota, reply and user_back.
 278  
 279  . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 280  
 281  . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 282  
 283  . Decide whether RX buffer recycling is or can be made completely safe;
 284    turn it back on. It looks like Werner is going to axe this.
 285  
 286  . Implement QoS changes on open VCs (involves extracting parts of VC open
 287    and close into separate functions and using them to make changes).
 288  
 289  . Hack on command queue so that someone can issue multiple commands and wait
 290    on the last one (OR only "no-op" or "wait" commands are waited for).
 291  
 292  . Eliminate need for while-schedule around do_command.
 293  
 294*/
 295
 296static void do_housekeeping (unsigned long arg);
 297/********** globals **********/
 298
 299static unsigned short debug = 0;
 300static unsigned int cmds = 8;
 301static unsigned int txs = 32;
 302static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 303static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 304static unsigned int rx_lats = 7;
 305static unsigned char pci_lat = 0;
 306
 307static const unsigned long onegigmask = -1 << 30;
 308
 309/********** access to adapter **********/
 310
 311static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 312  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 313#ifdef AMB_MMIO
 314  dev->membase[addr / sizeof(u32)] = data;
 315#else
 316  outl (data, dev->iobase + addr);
 317#endif
 318}
 319
 320static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 321#ifdef AMB_MMIO
 322  u32 data = dev->membase[addr / sizeof(u32)];
 323#else
 324  u32 data = inl (dev->iobase + addr);
 325#endif
 326  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 327  return data;
 328}
 329
 330static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 331  __be32 be = cpu_to_be32 (data);
 332  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 333#ifdef AMB_MMIO
 334  dev->membase[addr / sizeof(u32)] = be;
 335#else
 336  outl (be, dev->iobase + addr);
 337#endif
 338}
 339
 340static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 341#ifdef AMB_MMIO
 342  __be32 be = dev->membase[addr / sizeof(u32)];
 343#else
 344  __be32 be = inl (dev->iobase + addr);
 345#endif
 346  u32 data = be32_to_cpu (be);
 347  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 348  return data;
 349}
 350
 351/********** dump routines **********/
 352
 353static inline void dump_registers (const amb_dev * dev) {
 354#ifdef DEBUG_AMBASSADOR
 355  if (debug & DBG_REGS) {
 356    size_t i;
 357    PRINTD (DBG_REGS, "reading PLX control: ");
 358    for (i = 0x00; i < 0x30; i += sizeof(u32))
 359      rd_mem (dev, i);
 360    PRINTD (DBG_REGS, "reading mailboxes: ");
 361    for (i = 0x40; i < 0x60; i += sizeof(u32))
 362      rd_mem (dev, i);
 363    PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 364    for (i = 0x60; i < 0x70; i += sizeof(u32))
 365      rd_mem (dev, i);
 366  }
 367#else
 368  (void) dev;
 369#endif
 370  return;
 371}
 372
 373static inline void dump_loader_block (volatile loader_block * lb) {
 374#ifdef DEBUG_AMBASSADOR
 375  unsigned int i;
 376  PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 377	   lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 378  for (i = 0; i < MAX_COMMAND_DATA; ++i)
 379    PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 380  PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 381#else
 382  (void) lb;
 383#endif
 384  return;
 385}
 386
 387static inline void dump_command (command * cmd) {
 388#ifdef DEBUG_AMBASSADOR
 389  unsigned int i;
 390  PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 391	   cmd, /*be32_to_cpu*/ (cmd->request));
 392  for (i = 0; i < 3; ++i)
 393    PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 394  PRINTDE (DBG_CMD, "");
 395#else
 396  (void) cmd;
 397#endif
 398  return;
 399}
 400
 401static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 402#ifdef DEBUG_AMBASSADOR
 403  unsigned int i;
 404  unsigned char * data = skb->data;
 405  PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 406  for (i=0; i<skb->len && i < 256;i++)
 407    PRINTDM (DBG_DATA, "%02x ", data[i]);
 408  PRINTDE (DBG_DATA,"");
 409#else
 410  (void) prefix;
 411  (void) vc;
 412  (void) skb;
 413#endif
 414  return;
 415}
 416
 417/********** check memory areas for use by Ambassador **********/
 418
 419/* see limitations under Hardware Features */
 420
 421static int check_area (void * start, size_t length) {
 422  // assumes length > 0
 423  const u32 fourmegmask = -1 << 22;
 424  const u32 twofivesixmask = -1 << 8;
 425  const u32 starthole = 0xE0000000;
 426  u32 startaddress = virt_to_bus (start);
 427  u32 lastaddress = startaddress+length-1;
 428  if ((startaddress ^ lastaddress) & fourmegmask ||
 429      (startaddress & twofivesixmask) == starthole) {
 430    PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 431	    startaddress, lastaddress);
 432    return -1;
 433  } else {
 434    return 0;
 435  }
 436}
 437
 438/********** free an skb (as per ATM device driver documentation) **********/
 439
 440static void amb_kfree_skb (struct sk_buff * skb) {
 441  if (ATM_SKB(skb)->vcc->pop) {
 442    ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 443  } else {
 444    dev_kfree_skb_any (skb);
 445  }
 446}
 447
 448/********** TX completion **********/
 449
 450static void tx_complete (amb_dev * dev, tx_out * tx) {
 451  tx_simple * tx_descr = bus_to_virt (tx->handle);
 452  struct sk_buff * skb = tx_descr->skb;
 453  
 454  PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 455  
 456  // VC layer stats
 457  atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 458  
 459  // free the descriptor
 460  kfree (tx_descr);
 461  
 462  // free the skb
 463  amb_kfree_skb (skb);
 464  
 465  dev->stats.tx_ok++;
 466  return;
 467}
 468
 469/********** RX completion **********/
 470
 471static void rx_complete (amb_dev * dev, rx_out * rx) {
 472  struct sk_buff * skb = bus_to_virt (rx->handle);
 473  u16 vc = be16_to_cpu (rx->vc);
 474  // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 475  u16 status = be16_to_cpu (rx->status);
 476  u16 rx_len = be16_to_cpu (rx->length);
 477  
 478  PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 479  
 480  // XXX move this in and add to VC stats ???
 481  if (!status) {
 482    struct atm_vcc * atm_vcc = dev->rxer[vc];
 483    dev->stats.rx.ok++;
 484    
 485    if (atm_vcc) {
 486      
 487      if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 488	
 489	if (atm_charge (atm_vcc, skb->truesize)) {
 490	  
 491	  // prepare socket buffer
 492	  ATM_SKB(skb)->vcc = atm_vcc;
 493	  skb_put (skb, rx_len);
 494	  
 495	  dump_skb ("<<<", vc, skb);
 496	  
 497	  // VC layer stats
 498	  atomic_inc(&atm_vcc->stats->rx);
 499	  __net_timestamp(skb);
 500	  // end of our responsibility
 501	  atm_vcc->push (atm_vcc, skb);
 502	  return;
 503	  
 504	} else {
 505	  // someone fix this (message), please!
 506	  PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 507	  // drop stats incremented in atm_charge
 508	}
 509	
 510      } else {
 511      	PRINTK (KERN_INFO, "dropped over-size frame");
 512	// should we count this?
 513	atomic_inc(&atm_vcc->stats->rx_drop);
 514      }
 515      
 516    } else {
 517      PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 518      // this is an adapter bug, only in new version of microcode
 519    }
 520    
 521  } else {
 522    dev->stats.rx.error++;
 523    if (status & CRC_ERR)
 524      dev->stats.rx.badcrc++;
 525    if (status & LEN_ERR)
 526      dev->stats.rx.toolong++;
 527    if (status & ABORT_ERR)
 528      dev->stats.rx.aborted++;
 529    if (status & UNUSED_ERR)
 530      dev->stats.rx.unused++;
 531  }
 532  
 533  dev_kfree_skb_any (skb);
 534  return;
 535}
 536
 537/*
 538  
 539  Note on queue handling.
 540  
 541  Here "give" and "take" refer to queue entries and a queue (pair)
 542  rather than frames to or from the host or adapter. Empty frame
 543  buffers are given to the RX queue pair and returned unused or
 544  containing RX frames. TX frames (well, pointers to TX fragment
 545  lists) are given to the TX queue pair, completions are returned.
 546  
 547*/
 548
 549/********** command queue **********/
 550
 551// I really don't like this, but it's the best I can do at the moment
 552
 553// also, the callers are responsible for byte order as the microcode
 554// sometimes does 16-bit accesses (yuk yuk yuk)
 555
 556static int command_do (amb_dev * dev, command * cmd) {
 557  amb_cq * cq = &dev->cq;
 558  volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 559  command * my_slot;
 560  
 561  PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 562  
 563  if (test_bit (dead, &dev->flags))
 564    return 0;
 565  
 566  spin_lock (&cq->lock);
 567  
 568  // if not full...
 569  if (cq->pending < cq->maximum) {
 570    // remember my slot for later
 571    my_slot = ptrs->in;
 572    PRINTD (DBG_CMD, "command in slot %p", my_slot);
 573    
 574    dump_command (cmd);
 575    
 576    // copy command in
 577    *ptrs->in = *cmd;
 578    cq->pending++;
 579    ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 580    
 581    // mail the command
 582    wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 583    
 584    if (cq->pending > cq->high)
 585      cq->high = cq->pending;
 586    spin_unlock (&cq->lock);
 587    
 588    // these comments were in a while-loop before, msleep removes the loop
 589    // go to sleep
 590    // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 591    msleep(cq->pending);
 592    
 593    // wait for my slot to be reached (all waiters are here or above, until...)
 594    while (ptrs->out != my_slot) {
 595      PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 596      set_current_state(TASK_UNINTERRUPTIBLE);
 597      schedule();
 598    }
 599    
 600    // wait on my slot (... one gets to its slot, and... )
 601    while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 602      PRINTD (DBG_CMD, "wait: command slot completion");
 603      set_current_state(TASK_UNINTERRUPTIBLE);
 604      schedule();
 605    }
 606    
 607    PRINTD (DBG_CMD, "command complete");
 608    // update queue (... moves the queue along to the next slot)
 609    spin_lock (&cq->lock);
 610    cq->pending--;
 611    // copy command out
 612    *cmd = *ptrs->out;
 613    ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 614    spin_unlock (&cq->lock);
 615    
 616    return 0;
 617  } else {
 618    cq->filled++;
 619    spin_unlock (&cq->lock);
 620    return -EAGAIN;
 621  }
 622  
 623}
 624
 625/********** TX queue pair **********/
 626
 627static int tx_give (amb_dev * dev, tx_in * tx) {
 628  amb_txq * txq = &dev->txq;
 629  unsigned long flags;
 630  
 631  PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 632
 633  if (test_bit (dead, &dev->flags))
 634    return 0;
 635  
 636  spin_lock_irqsave (&txq->lock, flags);
 637  
 638  if (txq->pending < txq->maximum) {
 639    PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 640
 641    *txq->in.ptr = *tx;
 642    txq->pending++;
 643    txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 644    // hand over the TX and ring the bell
 645    wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 646    wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 647    
 648    if (txq->pending > txq->high)
 649      txq->high = txq->pending;
 650    spin_unlock_irqrestore (&txq->lock, flags);
 651    return 0;
 652  } else {
 653    txq->filled++;
 654    spin_unlock_irqrestore (&txq->lock, flags);
 655    return -EAGAIN;
 656  }
 657}
 658
 659static int tx_take (amb_dev * dev) {
 660  amb_txq * txq = &dev->txq;
 661  unsigned long flags;
 662  
 663  PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 664  
 665  spin_lock_irqsave (&txq->lock, flags);
 666  
 667  if (txq->pending && txq->out.ptr->handle) {
 668    // deal with TX completion
 669    tx_complete (dev, txq->out.ptr);
 670    // mark unused again
 671    txq->out.ptr->handle = 0;
 672    // remove item
 673    txq->pending--;
 674    txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 675    
 676    spin_unlock_irqrestore (&txq->lock, flags);
 677    return 0;
 678  } else {
 679    
 680    spin_unlock_irqrestore (&txq->lock, flags);
 681    return -1;
 682  }
 683}
 684
 685/********** RX queue pairs **********/
 686
 687static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 688  amb_rxq * rxq = &dev->rxq[pool];
 689  unsigned long flags;
 690  
 691  PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 692  
 693  spin_lock_irqsave (&rxq->lock, flags);
 694  
 695  if (rxq->pending < rxq->maximum) {
 696    PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 697
 698    *rxq->in.ptr = *rx;
 699    rxq->pending++;
 700    rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 701    // hand over the RX buffer
 702    wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 703    
 704    spin_unlock_irqrestore (&rxq->lock, flags);
 705    return 0;
 706  } else {
 707    spin_unlock_irqrestore (&rxq->lock, flags);
 708    return -1;
 709  }
 710}
 711
 712static int rx_take (amb_dev * dev, unsigned char pool) {
 713  amb_rxq * rxq = &dev->rxq[pool];
 714  unsigned long flags;
 715  
 716  PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 717  
 718  spin_lock_irqsave (&rxq->lock, flags);
 719  
 720  if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 721    // deal with RX completion
 722    rx_complete (dev, rxq->out.ptr);
 723    // mark unused again
 724    rxq->out.ptr->status = 0;
 725    rxq->out.ptr->length = 0;
 726    // remove item
 727    rxq->pending--;
 728    rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 729    
 730    if (rxq->pending < rxq->low)
 731      rxq->low = rxq->pending;
 732    spin_unlock_irqrestore (&rxq->lock, flags);
 733    return 0;
 734  } else {
 735    if (!rxq->pending && rxq->buffers_wanted)
 736      rxq->emptied++;
 737    spin_unlock_irqrestore (&rxq->lock, flags);
 738    return -1;
 739  }
 740}
 741
 742/********** RX Pool handling **********/
 743
 744/* pre: buffers_wanted = 0, post: pending = 0 */
 745static void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 746  amb_rxq * rxq = &dev->rxq[pool];
 747  
 748  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 749  
 750  if (test_bit (dead, &dev->flags))
 751    return;
 752  
 753  /* we are not quite like the fill pool routines as we cannot just
 754     remove one buffer, we have to remove all of them, but we might as
 755     well pretend... */
 756  if (rxq->pending > rxq->buffers_wanted) {
 757    command cmd;
 758    cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 759    cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 760    while (command_do (dev, &cmd))
 761      schedule();
 762    /* the pool may also be emptied via the interrupt handler */
 763    while (rxq->pending > rxq->buffers_wanted)
 764      if (rx_take (dev, pool))
 765	schedule();
 766  }
 767  
 768  return;
 769}
 770
 771static void drain_rx_pools (amb_dev * dev) {
 772  unsigned char pool;
 773  
 774  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 775  
 776  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 777    drain_rx_pool (dev, pool);
 778}
 779
 780static void fill_rx_pool (amb_dev * dev, unsigned char pool,
 781                                 gfp_t priority)
 782{
 783  rx_in rx;
 784  amb_rxq * rxq;
 785  
 786  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 787  
 788  if (test_bit (dead, &dev->flags))
 789    return;
 790  
 791  rxq = &dev->rxq[pool];
 792  while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 793    
 794    struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 795    if (!skb) {
 796      PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 797      return;
 798    }
 799    if (check_area (skb->data, skb->truesize)) {
 800      dev_kfree_skb_any (skb);
 801      return;
 802    }
 803    // cast needed as there is no %? for pointer differences
 804    PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 805	    skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
 806    rx.handle = virt_to_bus (skb);
 807    rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 808    if (rx_give (dev, &rx, pool))
 809      dev_kfree_skb_any (skb);
 810    
 811  }
 812  
 813  return;
 814}
 815
 816// top up all RX pools
 817static void fill_rx_pools (amb_dev * dev) {
 818  unsigned char pool;
 819  
 820  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 821  
 822  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 823    fill_rx_pool (dev, pool, GFP_ATOMIC);
 824  
 825  return;
 826}
 827
 828/********** enable host interrupts **********/
 829
 830static void interrupts_on (amb_dev * dev) {
 831  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 832	    rd_plain (dev, offsetof(amb_mem, interrupt_control))
 833	    | AMB_INTERRUPT_BITS);
 834}
 835
 836/********** disable host interrupts **********/
 837
 838static void interrupts_off (amb_dev * dev) {
 839  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 840	    rd_plain (dev, offsetof(amb_mem, interrupt_control))
 841	    &~ AMB_INTERRUPT_BITS);
 842}
 843
 844/********** interrupt handling **********/
 845
 846static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 847  amb_dev * dev = dev_id;
 848  
 849  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 850  
 851  {
 852    u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 853  
 854    // for us or someone else sharing the same interrupt
 855    if (!interrupt) {
 856      PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 857      return IRQ_NONE;
 858    }
 859    
 860    // definitely for us
 861    PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 862    wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 863  }
 864  
 865  {
 866    unsigned int irq_work = 0;
 867    unsigned char pool;
 868    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 869      while (!rx_take (dev, pool))
 870	++irq_work;
 871    while (!tx_take (dev))
 872      ++irq_work;
 873  
 874    if (irq_work) {
 875      fill_rx_pools (dev);
 876
 877      PRINTD (DBG_IRQ, "work done: %u", irq_work);
 878    } else {
 879      PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 880    }
 881  }
 882  
 883  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 884  return IRQ_HANDLED;
 885}
 886
 887/********** make rate (not quite as much fun as Horizon) **********/
 888
 889static int make_rate (unsigned int rate, rounding r,
 890		      u16 * bits, unsigned int * actual) {
 891  unsigned char exp = -1; // hush gcc
 892  unsigned int man = -1;  // hush gcc
 893  
 894  PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 895  
 896  // rates in cells per second, ITU format (nasty 16-bit floating-point)
 897  // given 5-bit e and 9-bit m:
 898  // rate = EITHER (1+m/2^9)*2^e    OR 0
 899  // bits = EITHER 1<<14 | e<<9 | m OR 0
 900  // (bit 15 is "reserved", bit 14 "non-zero")
 901  // smallest rate is 0 (special representation)
 902  // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 903  // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 904  // simple algorithm:
 905  // find position of top bit, this gives e
 906  // remove top bit and shift (rounding if feeling clever) by 9-e
 907  
 908  // ucode bug: please don't set bit 14! so 0 rate not representable
 909  
 910  if (rate > 0xffc00000U) {
 911    // larger than largest representable rate
 912    
 913    if (r == round_up) {
 914	return -EINVAL;
 915    } else {
 916      exp = 31;
 917      man = 511;
 918    }
 919    
 920  } else if (rate) {
 921    // representable rate
 922    
 923    exp = 31;
 924    man = rate;
 925    
 926    // invariant: rate = man*2^(exp-31)
 927    while (!(man & (1<<31))) {
 928      exp = exp - 1;
 929      man = man<<1;
 930    }
 931    
 932    // man has top bit set
 933    // rate = (2^31+(man-2^31))*2^(exp-31)
 934    // rate = (1+(man-2^31)/2^31)*2^exp
 935    man = man<<1;
 936    man &= 0xffffffffU; // a nop on 32-bit systems
 937    // rate = (1+man/2^32)*2^exp
 938    
 939    // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 940    // time to lose significance... we want m in the range 0 to 2^9-1
 941    // rounding presents a minor problem... we first decide which way
 942    // we are rounding (based on given rounding direction and possibly
 943    // the bits of the mantissa that are to be discarded).
 944    
 945    switch (r) {
 946      case round_down: {
 947	// just truncate
 948	man = man>>(32-9);
 949	break;
 950      }
 951      case round_up: {
 952	// check all bits that we are discarding
 953	if (man & (~0U>>9)) {
 954	  man = (man>>(32-9)) + 1;
 955	  if (man == (1<<9)) {
 956	    // no need to check for round up outside of range
 957	    man = 0;
 958	    exp += 1;
 959	  }
 960	} else {
 961	  man = (man>>(32-9));
 962	}
 963	break;
 964      }
 965      case round_nearest: {
 966	// check msb that we are discarding
 967	if (man & (1<<(32-9-1))) {
 968	  man = (man>>(32-9)) + 1;
 969	  if (man == (1<<9)) {
 970	    // no need to check for round up outside of range
 971	    man = 0;
 972	    exp += 1;
 973	  }
 974	} else {
 975	  man = (man>>(32-9));
 976	}
 977	break;
 978      }
 979    }
 980    
 981  } else {
 982    // zero rate - not representable
 983    
 984    if (r == round_down) {
 985      return -EINVAL;
 986    } else {
 987      exp = 0;
 988      man = 0;
 989    }
 990    
 991  }
 992  
 993  PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
 994  
 995  if (bits)
 996    *bits = /* (1<<14) | */ (exp<<9) | man;
 997  
 998  if (actual)
 999    *actual = (exp >= 9)
1000      ? (1 << exp) + (man << (exp-9))
1001      : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1002  
1003  return 0;
1004}
1005
1006/********** Linux ATM Operations **********/
1007
1008// some are not yet implemented while others do not make sense for
1009// this device
1010
1011/********** Open a VC **********/
1012
1013static int amb_open (struct atm_vcc * atm_vcc)
1014{
1015  int error;
1016  
1017  struct atm_qos * qos;
1018  struct atm_trafprm * txtp;
1019  struct atm_trafprm * rxtp;
1020  u16 tx_rate_bits = -1; // hush gcc
1021  u16 tx_vc_bits = -1; // hush gcc
1022  u16 tx_frame_bits = -1; // hush gcc
1023  
1024  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1025  amb_vcc * vcc;
1026  unsigned char pool = -1; // hush gcc
1027  short vpi = atm_vcc->vpi;
1028  int vci = atm_vcc->vci;
1029  
1030  PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1031  
1032#ifdef ATM_VPI_UNSPEC
1033  // UNSPEC is deprecated, remove this code eventually
1034  if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1035    PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1036    return -EINVAL;
1037  }
1038#endif
1039  
1040  if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1041	0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1042    PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1043    return -EINVAL;
1044  }
1045  
1046  qos = &atm_vcc->qos;
1047  
1048  if (qos->aal != ATM_AAL5) {
1049    PRINTD (DBG_QOS, "AAL not supported");
1050    return -EINVAL;
1051  }
1052  
1053  // traffic parameters
1054  
1055  PRINTD (DBG_QOS, "TX:");
1056  txtp = &qos->txtp;
1057  if (txtp->traffic_class != ATM_NONE) {
1058    switch (txtp->traffic_class) {
1059      case ATM_UBR: {
1060	// we take "the PCR" as a rate-cap
1061	int pcr = atm_pcr_goal (txtp);
1062	if (!pcr) {
1063	  // no rate cap
1064	  tx_rate_bits = 0;
1065	  tx_vc_bits = TX_UBR;
1066	  tx_frame_bits = TX_FRAME_NOTCAP;
1067	} else {
1068	  rounding r;
1069	  if (pcr < 0) {
1070	    r = round_down;
1071	    pcr = -pcr;
1072	  } else {
1073	    r = round_up;
1074	  }
1075	  error = make_rate (pcr, r, &tx_rate_bits, NULL);
1076	  if (error)
1077	    return error;
1078	  tx_vc_bits = TX_UBR_CAPPED;
1079	  tx_frame_bits = TX_FRAME_CAPPED;
1080	}
1081	break;
1082      }
1083#if 0
1084      case ATM_ABR: {
1085	pcr = atm_pcr_goal (txtp);
1086	PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1087	break;
1088      }
1089#endif
1090      default: {
1091	// PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1092	PRINTD (DBG_QOS, "request for non-UBR denied");
1093	return -EINVAL;
1094      }
1095    }
1096    PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1097	    tx_rate_bits, tx_vc_bits);
1098  }
1099  
1100  PRINTD (DBG_QOS, "RX:");
1101  rxtp = &qos->rxtp;
1102  if (rxtp->traffic_class == ATM_NONE) {
1103    // do nothing
1104  } else {
1105    // choose an RX pool (arranged in increasing size)
1106    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1107      if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1108	PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1109		pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1110	break;
1111      }
1112    if (pool == NUM_RX_POOLS) {
1113      PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1114	      "no pool suitable for VC (RX max_sdu %d is too large)",
1115	      rxtp->max_sdu);
1116      return -EINVAL;
1117    }
1118    
1119    switch (rxtp->traffic_class) {
1120      case ATM_UBR: {
1121	break;
1122      }
1123#if 0
1124      case ATM_ABR: {
1125	pcr = atm_pcr_goal (rxtp);
1126	PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1127	break;
1128      }
1129#endif
1130      default: {
1131	// PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1132	PRINTD (DBG_QOS, "request for non-UBR denied");
1133	return -EINVAL;
1134      }
1135    }
1136  }
1137  
1138  // get space for our vcc stuff
1139  vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1140  if (!vcc) {
1141    PRINTK (KERN_ERR, "out of memory!");
1142    return -ENOMEM;
1143  }
1144  atm_vcc->dev_data = (void *) vcc;
1145  
1146  // no failures beyond this point
1147  
1148  // we are not really "immediately before allocating the connection
1149  // identifier in hardware", but it will just have to do!
1150  set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1151  
1152  if (txtp->traffic_class != ATM_NONE) {
1153    command cmd;
1154    
1155    vcc->tx_frame_bits = tx_frame_bits;
1156    
1157    mutex_lock(&dev->vcc_sf);
1158    if (dev->rxer[vci]) {
1159      // RXer on the channel already, just modify rate...
1160      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1161      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1162      cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1163      while (command_do (dev, &cmd))
1164	schedule();
1165      // ... and TX flags, preserving the RX pool
1166      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1167      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1168      cmd.args.modify_flags.flags = cpu_to_be32
1169	( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1170	  | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1171      while (command_do (dev, &cmd))
1172	schedule();
1173    } else {
1174      // no RXer on the channel, just open (with pool zero)
1175      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1176      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1177      cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1178      cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1179      while (command_do (dev, &cmd))
1180	schedule();
1181    }
1182    dev->txer[vci].tx_present = 1;
1183    mutex_unlock(&dev->vcc_sf);
1184  }
1185  
1186  if (rxtp->traffic_class != ATM_NONE) {
1187    command cmd;
1188    
1189    vcc->rx_info.pool = pool;
1190    
1191    mutex_lock(&dev->vcc_sf);
1192    /* grow RX buffer pool */
1193    if (!dev->rxq[pool].buffers_wanted)
1194      dev->rxq[pool].buffers_wanted = rx_lats;
1195    dev->rxq[pool].buffers_wanted += 1;
1196    fill_rx_pool (dev, pool, GFP_KERNEL);
1197    
1198    if (dev->txer[vci].tx_present) {
1199      // TXer on the channel already
1200      // switch (from pool zero) to this pool, preserving the TX bits
1201      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1202      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1203      cmd.args.modify_flags.flags = cpu_to_be32
1204	( (pool << SRB_POOL_SHIFT)
1205	  | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1206    } else {
1207      // no TXer on the channel, open the VC (with no rate info)
1208      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1209      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1210      cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1211      cmd.args.open.rate = cpu_to_be32 (0);
1212    }
1213    while (command_do (dev, &cmd))
1214      schedule();
1215    // this link allows RX frames through
1216    dev->rxer[vci] = atm_vcc;
1217    mutex_unlock(&dev->vcc_sf);
1218  }
1219  
1220  // indicate readiness
1221  set_bit(ATM_VF_READY,&atm_vcc->flags);
1222  
1223  return 0;
1224}
1225
1226/********** Close a VC **********/
1227
1228static void amb_close (struct atm_vcc * atm_vcc) {
1229  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1230  amb_vcc * vcc = AMB_VCC (atm_vcc);
1231  u16 vci = atm_vcc->vci;
1232  
1233  PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1234  
1235  // indicate unreadiness
1236  clear_bit(ATM_VF_READY,&atm_vcc->flags);
1237  
1238  // disable TXing
1239  if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1240    command cmd;
1241    
1242    mutex_lock(&dev->vcc_sf);
1243    if (dev->rxer[vci]) {
1244      // RXer still on the channel, just modify rate... XXX not really needed
1245      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1246      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1247      cmd.args.modify_rate.rate = cpu_to_be32 (0);
1248      // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1249    } else {
1250      // no RXer on the channel, close channel
1251      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1252      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1253    }
1254    dev->txer[vci].tx_present = 0;
1255    while (command_do (dev, &cmd))
1256      schedule();
1257    mutex_unlock(&dev->vcc_sf);
1258  }
1259  
1260  // disable RXing
1261  if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1262    command cmd;
1263    
1264    // this is (the?) one reason why we need the amb_vcc struct
1265    unsigned char pool = vcc->rx_info.pool;
1266    
1267    mutex_lock(&dev->vcc_sf);
1268    if (dev->txer[vci].tx_present) {
1269      // TXer still on the channel, just go to pool zero XXX not really needed
1270      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1271      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1272      cmd.args.modify_flags.flags = cpu_to_be32
1273	(dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1274    } else {
1275      // no TXer on the channel, close the VC
1276      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1277      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1278    }
1279    // forget the rxer - no more skbs will be pushed
1280    if (atm_vcc != dev->rxer[vci])
1281      PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1282	      "arghhh! we're going to die!",
1283	      vcc, dev->rxer[vci]);
1284    dev->rxer[vci] = NULL;
1285    while (command_do (dev, &cmd))
1286      schedule();
1287    
1288    /* shrink RX buffer pool */
1289    dev->rxq[pool].buffers_wanted -= 1;
1290    if (dev->rxq[pool].buffers_wanted == rx_lats) {
1291      dev->rxq[pool].buffers_wanted = 0;
1292      drain_rx_pool (dev, pool);
1293    }
1294    mutex_unlock(&dev->vcc_sf);
1295  }
1296  
1297  // free our structure
1298  kfree (vcc);
1299  
1300  // say the VPI/VCI is free again
1301  clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1302
1303  return;
1304}
1305
1306/********** Send **********/
1307
1308static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1309  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1310  amb_vcc * vcc = AMB_VCC(atm_vcc);
1311  u16 vc = atm_vcc->vci;
1312  unsigned int tx_len = skb->len;
1313  unsigned char * tx_data = skb->data;
1314  tx_simple * tx_descr;
1315  tx_in tx;
1316  
1317  if (test_bit (dead, &dev->flags))
1318    return -EIO;
1319  
1320  PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1321	  vc, tx_data, tx_len);
1322  
1323  dump_skb (">>>", vc, skb);
1324  
1325  if (!dev->txer[vc].tx_present) {
1326    PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1327    return -EBADFD;
1328  }
1329  
1330  // this is a driver private field so we have to set it ourselves,
1331  // despite the fact that we are _required_ to use it to check for a
1332  // pop function
1333  ATM_SKB(skb)->vcc = atm_vcc;
1334  
1335  if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1336    PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1337    return -EIO;
1338  }
1339  
1340  if (check_area (skb->data, skb->len)) {
1341    atomic_inc(&atm_vcc->stats->tx_err);
1342    return -ENOMEM; // ?
1343  }
1344  
1345  // allocate memory for fragments
1346  tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1347  if (!tx_descr) {
1348    PRINTK (KERN_ERR, "could not allocate TX descriptor");
1349    return -ENOMEM;
1350  }
1351  if (check_area (tx_descr, sizeof(tx_simple))) {
1352    kfree (tx_descr);
1353    return -ENOMEM;
1354  }
1355  PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1356  
1357  tx_descr->skb = skb;
1358  
1359  tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1360  tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1361  
1362  tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1363  tx_descr->tx_frag_end.vc = 0;
1364  tx_descr->tx_frag_end.next_descriptor_length = 0;
1365  tx_descr->tx_frag_end.next_descriptor = 0;
1366#ifdef AMB_NEW_MICROCODE
1367  tx_descr->tx_frag_end.cpcs_uu = 0;
1368  tx_descr->tx_frag_end.cpi = 0;
1369  tx_descr->tx_frag_end.pad = 0;
1370#endif
1371  
1372  tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1373  tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1374  tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1375  
1376  while (tx_give (dev, &tx))
1377    schedule();
1378  return 0;
1379}
1380
1381/********** Change QoS on a VC **********/
1382
1383// int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1384
1385/********** Free RX Socket Buffer **********/
1386
1387#if 0
1388static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1389  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1390  amb_vcc * vcc = AMB_VCC (atm_vcc);
1391  unsigned char pool = vcc->rx_info.pool;
1392  rx_in rx;
1393  
1394  // This may be unsafe for various reasons that I cannot really guess
1395  // at. However, I note that the ATM layer calls kfree_skb rather
1396  // than dev_kfree_skb at this point so we are least covered as far
1397  // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1398
1399  PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1400	  skb, atm_vcc, vcc);
1401  
1402  rx.handle = virt_to_bus (skb);
1403  rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1404  
1405  skb->data = skb->head;
1406  skb->tail = skb->head;
1407  skb->len = 0;
1408  
1409  if (!rx_give (dev, &rx, pool)) {
1410    // success
1411    PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1412    return;
1413  }
1414  
1415  // just do what the ATM layer would have done
1416  dev_kfree_skb_any (skb);
1417  
1418  return;
1419}
1420#endif
1421
1422/********** Proc File Output **********/
1423
1424static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1425  amb_dev * dev = AMB_DEV (atm_dev);
1426  int left = *pos;
1427  unsigned char pool;
1428  
1429  PRINTD (DBG_FLOW, "amb_proc_read");
1430  
1431  /* more diagnostics here? */
1432  
1433  if (!left--) {
1434    amb_stats * s = &dev->stats;
1435    return sprintf (page,
1436		    "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1437		    "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1438		    s->tx_ok, s->rx.ok, s->rx.error,
1439		    s->rx.badcrc, s->rx.toolong,
1440		    s->rx.aborted, s->rx.unused);
1441  }
1442  
1443  if (!left--) {
1444    amb_cq * c = &dev->cq;
1445    return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1446		    c->pending, c->high, c->maximum);
1447  }
1448  
1449  if (!left--) {
1450    amb_txq * t = &dev->txq;
1451    return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1452		    t->pending, t->maximum, t->high, t->filled);
1453  }
1454  
1455  if (!left--) {
1456    unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1457    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1458      amb_rxq * r = &dev->rxq[pool];
1459      count += sprintf (page+count, " %u/%u/%u %u %u",
1460			r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1461    }
1462    count += sprintf (page+count, ".\n");
1463    return count;
1464  }
1465  
1466  if (!left--) {
1467    unsigned int count = sprintf (page, "RX buffer sizes:");
1468    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1469      amb_rxq * r = &dev->rxq[pool];
1470      count += sprintf (page+count, " %u", r->buffer_size);
1471    }
1472    count += sprintf (page+count, ".\n");
1473    return count;
1474  }
1475  
1476#if 0
1477  if (!left--) {
1478    // suni block etc?
1479  }
1480#endif
1481  
1482  return 0;
1483}
1484
1485/********** Operation Structure **********/
1486
1487static const struct atmdev_ops amb_ops = {
1488  .open         = amb_open,
1489  .close	= amb_close,
1490  .send         = amb_send,
1491  .proc_read	= amb_proc_read,
1492  .owner	= THIS_MODULE,
1493};
1494
1495/********** housekeeping **********/
1496static void do_housekeeping (unsigned long arg) {
1497  amb_dev * dev = (amb_dev *) arg;
1498  
1499  // could collect device-specific (not driver/atm-linux) stats here
1500      
1501  // last resort refill once every ten seconds
1502  fill_rx_pools (dev);
1503  mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1504  
1505  return;
1506}
1507
1508/********** creation of communication queues **********/
1509
1510static int __devinit create_queues (amb_dev * dev, unsigned int cmds,
1511				 unsigned int txs, unsigned int * rxs,
1512				 unsigned int * rx_buffer_sizes) {
1513  unsigned char pool;
1514  size_t total = 0;
1515  void * memory;
1516  void * limit;
1517  
1518  PRINTD (DBG_FLOW, "create_queues %p", dev);
1519  
1520  total += cmds * sizeof(command);
1521  
1522  total += txs * (sizeof(tx_in) + sizeof(tx_out));
1523  
1524  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1525    total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1526  
1527  memory = kmalloc (total, GFP_KERNEL);
1528  if (!memory) {
1529    PRINTK (KERN_ERR, "could not allocate queues");
1530    return -ENOMEM;
1531  }
1532  if (check_area (memory, total)) {
1533    PRINTK (KERN_ERR, "queues allocated in nasty area");
1534    kfree (memory);
1535    return -ENOMEM;
1536  }
1537  
1538  limit = memory + total;
1539  PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1540  
1541  PRINTD (DBG_CMD, "command queue at %p", memory);
1542  
1543  {
1544    command * cmd = memory;
1545    amb_cq * cq = &dev->cq;
1546    
1547    cq->pending = 0;
1548    cq->high = 0;
1549    cq->maximum = cmds - 1;
1550    
1551    cq->ptrs.start = cmd;
1552    cq->ptrs.in = cmd;
1553    cq->ptrs.out = cmd;
1554    cq->ptrs.limit = cmd + cmds;
1555    
1556    memory = cq->ptrs.limit;
1557  }
1558  
1559  PRINTD (DBG_TX, "TX queue pair at %p", memory);
1560  
1561  {
1562    tx_in * in = memory;
1563    tx_out * out;
1564    amb_txq * txq = &dev->txq;
1565    
1566    txq->pending = 0;
1567    txq->high = 0;
1568    txq->filled = 0;
1569    txq->maximum = txs - 1;
1570    
1571    txq->in.start = in;
1572    txq->in.ptr = in;
1573    txq->in.limit = in + txs;
1574    
1575    memory = txq->in.limit;
1576    out = memory;
1577    
1578    txq->out.start = out;
1579    txq->out.ptr = out;
1580    txq->out.limit = out + txs;
1581    
1582    memory = txq->out.limit;
1583  }
1584  
1585  PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1586  
1587  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1588    rx_in * in = memory;
1589    rx_out * out;
1590    amb_rxq * rxq = &dev->rxq[pool];
1591    
1592    rxq->buffer_size = rx_buffer_sizes[pool];
1593    rxq->buffers_wanted = 0;
1594    
1595    rxq->pending = 0;
1596    rxq->low = rxs[pool] - 1;
1597    rxq->emptied = 0;
1598    rxq->maximum = rxs[pool] - 1;
1599    
1600    rxq->in.start = in;
1601    rxq->in.ptr = in;
1602    rxq->in.limit = in + rxs[pool];
1603    
1604    memory = rxq->in.limit;
1605    out = memory;
1606    
1607    rxq->out.start = out;
1608    rxq->out.ptr = out;
1609    rxq->out.limit = out + rxs[pool];
1610    
1611    memory = rxq->out.limit;
1612  }
1613  
1614  if (memory == limit) {
1615    return 0;
1616  } else {
1617    PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1618    kfree (limit - total);
1619    return -ENOMEM;
1620  }
1621  
1622}
1623
1624/********** destruction of communication queues **********/
1625
1626static void destroy_queues (amb_dev * dev) {
1627  // all queues assumed empty
1628  void * memory = dev->cq.ptrs.start;
1629  // includes txq.in, txq.out, rxq[].in and rxq[].out
1630  
1631  PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1632  
1633  PRINTD (DBG_INIT, "freeing queues at %p", memory);
1634  kfree (memory);
1635  
1636  return;
1637}
1638
1639/********** basic loader commands and error handling **********/
1640// centisecond timeouts - guessing away here
1641static unsigned int command_timeouts [] = {
1642	[host_memory_test]     = 15,
1643	[read_adapter_memory]  = 2,
1644	[write_adapter_memory] = 2,
1645	[adapter_start]        = 50,
1646	[get_version_number]   = 10,
1647	[interrupt_host]       = 1,
1648	[flash_erase_sector]   = 1,
1649	[adap_download_block]  = 1,
1650	[adap_erase_flash]     = 1,
1651	[adap_run_in_iram]     = 1,
1652	[adap_end_download]    = 1
1653};
1654
1655
1656static unsigned int command_successes [] = {
1657	[host_memory_test]     = COMMAND_PASSED_TEST,
1658	[read_adapter_memory]  = COMMAND_READ_DATA_OK,
1659	[write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1660	[adapter_start]        = COMMAND_COMPLETE,
1661	[get_version_number]   = COMMAND_COMPLETE,
1662	[interrupt_host]       = COMMAND_COMPLETE,
1663	[flash_erase_sector]   = COMMAND_COMPLETE,
1664	[adap_download_block]  = COMMAND_COMPLETE,
1665	[adap_erase_flash]     = COMMAND_COMPLETE,
1666	[adap_run_in_iram]     = COMMAND_COMPLETE,
1667	[adap_end_download]    = COMMAND_COMPLETE
1668};
1669  
1670static  int decode_loader_result (loader_command cmd, u32 result)
1671{
1672	int res;
1673	const char *msg;
1674
1675	if (result == command_successes[cmd])
1676		return 0;
1677
1678	switch (result) {
1679		case BAD_COMMAND:
1680			res = -EINVAL;
1681			msg = "bad command";
1682			break;
1683		case COMMAND_IN_PROGRESS:
1684			res = -ETIMEDOUT;
1685			msg = "command in progress";
1686			break;
1687		case COMMAND_PASSED_TEST:
1688			res = 0;
1689			msg = "command passed test";
1690			break;
1691		case COMMAND_FAILED_TEST:
1692			res = -EIO;
1693			msg = "command failed test";
1694			break;
1695		case COMMAND_READ_DATA_OK:
1696			res = 0;
1697			msg = "command read data ok";
1698			break;
1699		case COMMAND_READ_BAD_ADDRESS:
1700			res = -EINVAL;
1701			msg = "command read bad address";
1702			break;
1703		case COMMAND_WRITE_DATA_OK:
1704			res = 0;
1705			msg = "command write data ok";
1706			break;
1707		case COMMAND_WRITE_BAD_ADDRESS:
1708			res = -EINVAL;
1709			msg = "command write bad address";
1710			break;
1711		case COMMAND_WRITE_FLASH_FAILURE:
1712			res = -EIO;
1713			msg = "command write flash failure";
1714			break;
1715		case COMMAND_COMPLETE:
1716			res = 0;
1717			msg = "command complete";
1718			break;
1719		case COMMAND_FLASH_ERASE_FAILURE:
1720			res = -EIO;
1721			msg = "command flash erase failure";
1722			break;
1723		case COMMAND_WRITE_BAD_DATA:
1724			res = -EINVAL;
1725			msg = "command write bad data";
1726			break;
1727		default:
1728			res = -EINVAL;
1729			msg = "unknown error";
1730			PRINTD (DBG_LOAD|DBG_ERR,
1731				"decode_loader_result got %d=%x !",
1732				result, result);
1733			break;
1734	}
1735
1736	PRINTK (KERN_ERR, "%s", msg);
1737	return res;
1738}
1739
1740static int __devinit do_loader_command (volatile loader_block * lb,
1741				     const amb_dev * dev, loader_command cmd) {
1742  
1743  unsigned long timeout;
1744  
1745  PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1746  
1747  /* do a command
1748     
1749     Set the return value to zero, set the command type and set the
1750     valid entry to the right magic value. The payload is already
1751     correctly byte-ordered so we leave it alone. Hit the doorbell
1752     with the bus address of this structure.
1753     
1754  */
1755  
1756  lb->result = 0;
1757  lb->command = cpu_to_be32 (cmd);
1758  lb->valid = cpu_to_be32 (DMA_VALID);
1759  // dump_registers (dev);
1760  // dump_loader_block (lb);
1761  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1762  
1763  timeout = command_timeouts[cmd] * 10;
1764  
1765  while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1766    if (timeout) {
1767      timeout = msleep_interruptible(timeout);
1768    } else {
1769      PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1770      dump_registers (dev);
1771      dump_loader_block (lb);
1772      return -ETIMEDOUT;
1773    }
1774  
1775  if (cmd == adapter_start) {
1776    // wait for start command to acknowledge...
1777    timeout = 100;
1778    while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1779      if (timeout) {
1780	timeout = msleep_interruptible(timeout);
1781      } else {
1782	PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1783		be32_to_cpu (lb->result));
1784	dump_registers (dev);
1785	return -ETIMEDOUT;
1786      }
1787    return 0;
1788  } else {
1789    return decode_loader_result (cmd, be32_to_cpu (lb->result));
1790  }
1791  
1792}
1793
1794/* loader: determine loader version */
1795
1796static int __devinit get_loader_version (loader_block * lb,
1797				      const amb_dev * dev, u32 * version) {
1798  int res;
1799  
1800  PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1801  
1802  res = do_loader_command (lb, dev, get_version_number);
1803  if (res)
1804    return res;
1805  if (version)
1806    *version = be32_to_cpu (lb->payload.version);
1807  return 0;
1808}
1809
1810/* loader: write memory data blocks */
1811
1812static int __devinit loader_write (loader_block* lb,
1813				   const amb_dev *dev,
1814				   const struct ihex_binrec *rec) {
1815  transfer_block * tb = &lb->payload.transfer;
1816  
1817  PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1818
1819  tb->address = rec->addr;
1820  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1821  memcpy(tb->data, rec->data, be16_to_cpu(rec->len));
1822  return do_loader_command (lb, dev, write_adapter_memory);
1823}
1824
1825/* loader: verify memory data blocks */
1826
1827static int __devinit loader_verify (loader_block * lb,
1828				    const amb_dev *dev,
1829				    const struct ihex_binrec *rec) {
1830  transfer_block * tb = &lb->payload.transfer;
1831  int res;
1832  
1833  PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1834  
1835  tb->address = rec->addr;
1836  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1837  res = do_loader_command (lb, dev, read_adapter_memory);
1838  if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len)))
1839    res = -EINVAL;
1840  return res;
1841}
1842
1843/* loader: start microcode */
1844
1845static int __devinit loader_start (loader_block * lb,
1846				const amb_dev * dev, u32 address) {
1847  PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1848  
1849  lb->payload.start = cpu_to_be32 (address);
1850  return do_loader_command (lb, dev, adapter_start);
1851}
1852
1853/********** reset card **********/
1854
1855static inline void sf (const char * msg)
1856{
1857	PRINTK (KERN_ERR, "self-test failed: %s", msg);
1858}
1859
1860static int amb_reset (amb_dev * dev, int diags) {
1861  u32 word;
1862  
1863  PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1864  
1865  word = rd_plain (dev, offsetof(amb_mem, reset_control));
1866  // put card into reset state
1867  wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1868  // wait a short while
1869  udelay (10);
1870#if 1
1871  // put card into known good state
1872  wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1873  // clear all interrupts just in case
1874  wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1875#endif
1876  // clear self-test done flag
1877  wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1878  // take card out of reset state
1879  wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1880  
1881  if (diags) { 
1882    unsigned long timeout;
1883    // 4.2 second wait
1884    msleep(4200);
1885    // half second time-out
1886    timeout = 500;
1887    while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1888      if (timeout) {
1889	timeout = msleep_interruptible(timeout);
1890      } else {
1891	PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1892	return -ETIMEDOUT;
1893      }
1894    
1895    // get results of self-test
1896    // XXX double check byte-order
1897    word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1898    if (word & SELF_TEST_FAILURE) {
1899      if (word & GPINT_TST_FAILURE)
1900	sf ("interrupt");
1901      if (word & SUNI_DATA_PATTERN_FAILURE)
1902	sf ("SUNI data pattern");
1903      if (word & SUNI_DATA_BITS_FAILURE)
1904	sf ("SUNI data bits");
1905      if (word & SUNI_UTOPIA_FAILURE)
1906	sf ("SUNI UTOPIA interface");
1907      if (word & SUNI_FIFO_FAILURE)
1908	sf ("SUNI cell buffer FIFO");
1909      if (word & SRAM_FAILURE)
1910	sf ("bad SRAM");
1911      // better return value?
1912      return -EIO;
1913    }
1914    
1915  }
1916  return 0;
1917}
1918
1919/********** transfer and start the microcode **********/
1920
1921static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
1922  const struct firmware *fw;
1923  unsigned long start_address;
1924  const struct ihex_binrec *rec;
1925  const char *errmsg = 0;
1926  int res;
1927
1928  res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
1929  if (res) {
1930    PRINTK (KERN_ERR, "Cannot load microcode data");
1931    return res;
1932  }
1933
1934  /* First record contains just the start address */
1935  rec = (const struct ihex_binrec *)fw->data;
1936  if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
1937    errmsg = "no start record";
1938    goto fail;
1939  }
1940  start_address = be32_to_cpup((__be32 *)rec->data);
1941
1942  rec = ihex_next_binrec(rec);
1943
1944  PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1945
1946  while (rec) {
1947    PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
1948	    be16_to_cpu(rec->len));
1949    if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
1950	    errmsg = "record too long";
1951	    goto fail;
1952    }
1953    if (be16_to_cpu(rec->len) & 3) {
1954	    errmsg = "odd number of bytes";
1955	    goto fail;
1956    }
1957    res = loader_write(lb, dev, rec);
1958    if (res)
1959      break;
1960
1961    res = loader_verify(lb, dev, rec);
1962    if (res)
1963      break;
1964  }
1965  release_firmware(fw);
1966  if (!res)
1967    res = loader_start(lb, dev, start_address);
1968
1969  return res;
1970fail:
1971  release_firmware(fw);
1972  PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg);
1973  return -EINVAL;
1974}
1975
1976/********** give adapter parameters **********/
1977  
1978static inline __be32 bus_addr(void * addr) {
1979    return cpu_to_be32 (virt_to_bus (addr));
1980}
1981
1982static int __devinit amb_talk (amb_dev * dev) {
1983  adap_talk_block a;
1984  unsigned char pool;
1985  unsigned long timeout;
1986  
1987  PRINTD (DBG_FLOW, "amb_talk %p", dev);
1988  
1989  a.command_start = bus_addr (dev->cq.ptrs.start);
1990  a.command_end   = bus_addr (dev->cq.ptrs.limit);
1991  a.tx_start      = bus_addr (dev->txq.in.start);
1992  a.tx_end        = bus_addr (dev->txq.in.limit);
1993  a.txcom_start   = bus_addr (dev->txq.out.start);
1994  a.txcom_end     = bus_addr (dev->txq.out.limit);
1995  
1996  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1997    // the other "a" items are set up by the adapter
1998    a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
1999    a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2000    a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2001    a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2002    a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2003  }
2004  
2005#ifdef AMB_NEW_MICROCODE
2006  // disable fast PLX prefetching
2007  a.init_flags = 0;
2008#endif
2009  
2010  // pass the structure
2011  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2012  
2013  // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2014  msleep(2200);
2015  // give the adapter another half second?
2016  timeout = 500;
2017  while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2018    if (timeout) {
2019      timeout = msleep_interruptible(timeout);
2020    } else {
2021      PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2022      return -ETIMEDOUT;
2023    }
2024  
2025  return 0;
2026}
2027
2028// get microcode version
2029static void __devinit amb_ucode_version (amb_dev * dev) {
2030  u32 major;
2031  u32 minor;
2032  command cmd;
2033  cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2034  while (command_do (dev, &cmd)) {
2035    set_current_state(TASK_UNINTERRUPTIBLE);
2036    schedule();
2037  }
2038  major = be32_to_cpu (cmd.args.version.major);
2039  minor = be32_to_cpu (cmd.args.version.minor);
2040  PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2041}
2042  
2043// get end station address
2044static void __devinit amb_esi (amb_dev * dev, u8 * esi) {
2045  u32 lower4;
2046  u16 upper2;
2047  command cmd;
2048  
2049  cmd.request = cpu_to_be32 (SRB_GET_BIA);
2050  while (command_do (dev, &cmd)) {
2051    set_current_state(TASK_UNINTERRUPTIBLE);
2052    schedule();
2053  }
2054  lower4 = be32_to_cpu (cmd.args.bia.lower4);
2055  upper2 = be32_to_cpu (cmd.args.bia.upper2);
2056  PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2057  
2058  if (esi) {
2059    unsigned int i;
2060    
2061    PRINTDB (DBG_INIT, "ESI:");
2062    for (i = 0; i < ESI_LEN; ++i) {
2063      if (i < 4)
2064	  esi[i] = bitrev8(lower4>>(8*i));
2065      else
2066	  esi[i] = bitrev8(upper2>>(8*(i-4)));
2067      PRINTDM (DBG_INIT, " %02x", esi[i]);
2068    }
2069    
2070    PRINTDE (DBG_INIT, "");
2071  }
2072  
2073  return;
2074}
2075  
2076static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2077{
2078	// fix up the PLX-mapped window base address to match the block
2079	unsigned long blb;
2080	u32 mapreg;
2081	blb = virt_to_bus(lb);
2082	// the kernel stack had better not ever cross a 1Gb boundary!
2083	mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2084	mapreg &= ~onegigmask;
2085	mapreg |= blb & onegigmask;
2086	wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2087	return;
2088}
2089
2090static int __devinit amb_init (amb_dev * dev)
2091{
2092  loader_block lb;
2093  
2094  u32 version;
2095  
2096  if (amb_reset (dev, 1)) {
2097    PRINTK (KERN_ERR, "card reset failed!");
2098  } else {
2099    fixup_plx_window (dev, &lb);
2100    
2101    if (get_loader_version (&lb, dev, &version)) {
2102      PRINTK (KERN_INFO, "failed to get loader version");
2103    } else {
2104      PRINTK (KERN_INFO, "loader version is %08x", version);
2105      
2106      if (ucode_init (&lb, dev)) {
2107	PRINTK (KERN_ERR, "microcode failure");
2108      } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2109	PRINTK (KERN_ERR, "failed to get memory for queues");
2110      } else {
2111	
2112	if (amb_talk (dev)) {
2113	  PRINTK (KERN_ERR, "adapter did not accept queues");
2114	} else {
2115	  
2116	  amb_ucode_version (dev);
2117	  return 0;
2118	  
2119	} /* amb_talk */
2120	
2121	destroy_queues (dev);
2122      } /* create_queues, ucode_init */
2123      
2124      amb_reset (dev, 0);
2125    } /* get_loader_version */
2126    
2127  } /* amb_reset */
2128  
2129  return -EINVAL;
2130}
2131
2132static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) 
2133{
2134      unsigned char pool;
2135      
2136      // set up known dev items straight away
2137      dev->pci_dev = pci_dev; 
2138      pci_set_drvdata(pci_dev, dev);
2139      
2140      dev->iobase = pci_resource_start (pci_dev, 1);
2141      dev->irq = pci_dev->irq; 
2142      dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2143      
2144      // flags (currently only dead)
2145      dev->flags = 0;
2146      
2147      // Allocate cell rates (fibre)
2148      // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2149      // to be really pedantic, this should be ATM_OC3c_PCR
2150      dev->tx_avail = ATM_OC3_PCR;
2151      dev->rx_avail = ATM_OC3_PCR;
2152      
2153      // semaphore for txer/rxer modifications - we cannot use a
2154      // spinlock as the critical region needs to switch processes
2155      mutex_init(&dev->vcc_sf);
2156      // queue manipulation spinlocks; we want atomic reads and
2157      // writes to the queue descriptors (handles IRQ and SMP)
2158      // consider replacing "int pending" -> "atomic_t available"
2159      // => problem related to who gets to move queue pointers
2160      spin_lock_init (&dev->cq.lock);
2161      spin_lock_init (&dev->txq.lock);
2162      for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2163	spin_lock_init (&dev->rxq[pool].lock);
2164}
2165
2166static void setup_pci_dev(struct pci_dev *pci_dev)
2167{
2168	unsigned char lat;
2169      
2170	// enable bus master accesses
2171	pci_set_master(pci_dev);
2172
2173	// frobnicate latency (upwards, usually)
2174	pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2175
2176	if (!pci_lat)
2177		pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2178
2179	if (lat != pci_lat) {
2180		PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2181			lat, pci_lat);
2182		pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2183	}
2184}
2185
2186static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2187{
2188	amb_dev * dev;
2189	int err;
2190	unsigned int irq;
2191      
2192	err = pci_enable_device(pci_dev);
2193	if (err < 0) {
2194		PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2195		goto out;
2196	}
2197
2198	// read resources from PCI configuration space
2199	irq = pci_dev->irq;
2200
2201	if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2202		PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2203		err = -EINVAL;
2204		goto out_disable;
2205	}
2206
2207	PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2208		" IO %llx, IRQ %u, MEM %p",
2209		(unsigned long long)pci_resource_start(pci_dev, 1),
2210		irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2211
2212	// check IO region
2213	err = pci_request_region(pci_dev, 1, DEV_LABEL);
2214	if (err < 0) {
2215		PRINTK (KERN_ERR, "IO range already in use!");
2216		goto out_disable;
2217	}
2218
2219	dev = kzalloc(sizeof(amb_dev), GFP_KERNEL);
2220	if (!dev) {
2221		PRINTK (KERN_ERR, "out of memory!");
2222		err = -ENOMEM;
2223		goto out_release;
2224	}
2225
2226	setup_dev(dev, pci_dev);
2227
2228	err = amb_init(dev);
2229	if (err < 0) {
2230		PRINTK (KERN_ERR, "adapter initialisation failure");
2231		goto out_free;
2232	}
2233
2234	setup_pci_dev(pci_dev);
2235
2236	// grab (but share) IRQ and install handler
2237	err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2238	if (err < 0) {
2239		PRINTK (KERN_ERR, "request IRQ failed!");
2240		goto out_reset;
2241	}
2242
2243	dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1,
2244					 NULL);
2245	if (!dev->atm_dev) {
2246		PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2247		err = -EINVAL;
2248		goto out_free_irq;
2249	}
2250
2251	PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2252		dev->atm_dev->number, dev, dev->atm_dev);
2253		dev->atm_dev->dev_data = (void *) dev;
2254
2255	// register our address
2256	amb_esi (dev, dev->atm_dev->esi);
2257
2258	// 0 bits for vpi, 10 bits for vci
2259	dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2260	dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2261
2262	init_timer(&dev->housekeeping);
2263	dev->housekeeping.function = do_housekeeping;
2264	dev->housekeeping.data = (unsigned long) dev;
2265	mod_timer(&dev->housekeeping, jiffies);
2266
2267	// enable host interrupts
2268	interrupts_on (dev);
2269
2270out:
2271	return err;
2272
2273out_free_irq:
2274	free_irq(irq, dev);
2275out_reset:
2276	amb_reset(dev, 0);
2277out_free:
2278	kfree(dev);
2279out_release:
2280	pci_release_region(pci_dev, 1);
2281out_disable:
2282	pci_disable_device(pci_dev);
2283	goto out;
2284}
2285
2286
2287static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2288{
2289	struct amb_dev *dev;
2290
2291	dev = pci_get_drvdata(pci_dev);
2292
2293	PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2294	del_timer_sync(&dev->housekeeping);
2295	// the drain should not be necessary
2296	drain_rx_pools(dev);
2297	interrupts_off(dev);
2298	amb_reset(dev, 0);
2299	free_irq(dev->irq, dev);
2300	pci_disable_device(pci_dev);
2301	destroy_queues(dev);
2302	atm_dev_deregister(dev->atm_dev);
2303	kfree(dev);
2304	pci_release_region(pci_dev, 1);
2305}
2306
2307static void __init amb_check_args (void) {
2308  unsigned char pool;
2309  unsigned int max_rx_size;
2310  
2311#ifdef DEBUG_AMBASSADOR
2312  PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2313#else
2314  if (debug)
2315    PRINTK (KERN_NOTICE, "no debugging support");
2316#endif
2317  
2318  if (cmds < MIN_QUEUE_SIZE)
2319    PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2320	    cmds = MIN_QUEUE_SIZE);
2321  
2322  if (txs < MIN_QUEUE_SIZE)
2323    PRINTK (KERN_NOTICE, "txs has been raised to %u",
2324	    txs = MIN_QUEUE_SIZE);
2325  
2326  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2327    if (rxs[pool] < MIN_QUEUE_SIZE)
2328      PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2329	      pool, rxs[pool] = MIN_QUEUE_SIZE);
2330  
2331  // buffers sizes should be greater than zero and strictly increasing
2332  max_rx_size = 0;
2333  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2334    if (rxs_bs[pool] <= max_rx_size)
2335      PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2336	      pool, rxs_bs[pool]);
2337    else
2338      max_rx_size = rxs_bs[pool];
2339  
2340  if (rx_lats < MIN_RX_BUFFERS)
2341    PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2342	    rx_lats = MIN_RX_BUFFERS);
2343  
2344  return;
2345}
2346
2347/********** module stuff **********/
2348
2349MODULE_AUTHOR(maintainer_string);
2350MODULE_DESCRIPTION(description_string);
2351MODULE_LICENSE("GPL");
2352MODULE_FIRMWARE("atmsar11.fw");
2353module_param(debug,   ushort, 0644);
2354module_param(cmds,    uint, 0);
2355module_param(txs,     uint, 0);
2356module_param_array(rxs,     uint, NULL, 0);
2357module_param_array(rxs_bs,  uint, NULL, 0);
2358module_param(rx_lats, uint, 0);
2359module_param(pci_lat, byte, 0);
2360MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2361MODULE_PARM_DESC(cmds,    "number of command queue entries");
2362MODULE_PARM_DESC(txs,     "number of TX queue entries");
2363MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2364MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2365MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2366MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2367
2368/********** module entry **********/
2369
2370static struct pci_device_id amb_pci_tbl[] = {
2371	{ PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
2372	{ PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
2373	{ 0, }
2374};
2375
2376MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2377
2378static struct pci_driver amb_driver = {
2379	.name =		"amb",
2380	.probe =	amb_probe,
2381	.remove =	__devexit_p(amb_remove_one),
2382	.id_table =	amb_pci_tbl,
2383};
2384
2385static int __init amb_module_init (void)
2386{
2387  PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2388  
2389  // sanity check - cast needed as printk does not support %Zu
2390  if (sizeof(amb_mem) != 4*16 + 4*12) {
2391    PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2392	    (unsigned long) sizeof(amb_mem));
2393    return -ENOMEM;
2394  }
2395  
2396  show_version();
2397  
2398  amb_check_args();
2399  
2400  // get the juice
2401  return pci_register_driver(&amb_driver);
2402}
2403
2404/********** module exit **********/
2405
2406static void __exit amb_module_exit (void)
2407{
2408  PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2409
2410  pci_unregister_driver(&amb_driver);
2411}
2412
2413module_init(amb_module_init);
2414module_exit(amb_module_exit);