Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions drivers/net/ethernet/cadence/macb.h
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,17 @@ struct macb_queue {
struct work_struct tx_error_task;
bool txubr_pending;
bool tx_pending;

/* TX stall watchdog -- see macb_tx_stall_watchdog() in macb_main.c.
* tx_stall_tail_moved is set by macb_tx_complete() under tx_ptr_lock
* whenever tx_tail advances, and cleared by the watchdog tick on the
* same lock. A bool avoids the index-aliasing false-positive that a
* snapshot-of-tx_tail comparison would have when the ring index space
* happens to wrap to the same value between two ticks.
*/
struct delayed_work tx_stall_watchdog_work;
bool tx_stall_tail_moved;

struct napi_struct napi_tx;

dma_addr_t rx_ring_dma;
Expand Down
102 changes: 92 additions & 10 deletions drivers/net/ethernet/cadence/macb_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,8 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
packets, bytes);

queue->tx_tail = tail;
if (packets)
queue->tx_stall_tail_moved = true;
if (__netif_subqueue_stopped(bp->dev, queue_index) &&
CIRC_CNT(queue->tx_head, queue->tx_tail,
bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
Expand Down Expand Up @@ -1949,6 +1951,12 @@ static void macb_tx_restart(struct macb_queue *queue)

spin_lock(&bp->lock);
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
/* Flush the PCIe posted-write queue so the TSTART doorbell
* reliably reaches the MAC. Without this, the write can sit
* in the fabric and the MAC never advances, causing a silent
* TX stall.
*/
(void)macb_readl(bp, NCR);
spin_unlock(&bp->lock);

out_tx_ptr_unlock:
Expand Down Expand Up @@ -1993,17 +2001,24 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
if (work_done < budget && napi_complete_done(napi, work_done)) {
queue_writel(queue, IER, MACB_BIT(TCOMP));

/* Packet completions only seem to propagate to raise
* interrupts when interrupts are enabled at the time, so if
* packets were sent while interrupts were disabled,
* they will not cause another interrupt to be generated when
* interrupts are re-enabled.
* Check for this case here to avoid losing a wakeup. This can
* potentially race with the interrupt handler doing the same
* actions if an interrupt is raised just after enabling them,
* but this should be harmless.
/* TCOMP events that fire while the interrupt is masked do
* not re-fire when IER is re-enabled. Catch this two ways
* to avoid losing a wakeup:
*
* (1) Read ISR -- catches completions the hardware flagged
* but that we did not see as an interrupt. The MMIO
* read doubles as a PCIe read barrier, flushing any
* in-flight descriptor TX_USED DMA writes into memory.
* (2) macb_tx_complete_pending() inspects the ring after
* that flush, catching a descriptor whose TX_USED is
* now visible as a result of the barrier.
*
* This can race with the interrupt handler taking the same
* path if an interrupt fires just after the IER write;
* rescheduling NAPI in that case is harmless.
*/
if (macb_tx_complete_pending(queue)) {
if ((queue_readl(queue, ISR) & MACB_BIT(TCOMP)) ||
macb_tx_complete_pending(queue)) {
queue_writel(queue, IDR, MACB_BIT(TCOMP));
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(TCOMP));
Expand All @@ -2015,6 +2030,63 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
return work_done;
}

#define MACB_TX_STALL_INTERVAL_MS 1000

/* TX stall watchdog.
*
* Defence-in-depth against lost TCOMP interrupts. macb already has a
* recovery chain (tx_pending -> txubr_pending -> macb_tx_restart())
* that fires on TCOMP; if TCOMP itself is lost the TX ring stalls
* silently until something else kicks TSTART. This watchdog runs
* once per second per queue and calls macb_tx_restart() if the ring
* is non-empty and tx_tail has not advanced since the previous tick.
*
* Movement is tracked via the tx_stall_tail_moved boolean rather
* than by snapshotting tx_tail. Per-queue ring indices are bounded
* (and reused), so a snapshot comparison can false-positive when the
* index happens to land on the same value between two ticks under
* sustained load. The boolean is set by macb_tx_complete() whenever
* tx_tail advances and cleared by this watchdog after each tick;
* both writes are under tx_ptr_lock, so no atomic is required.
*
* macb_tx_restart() already checks the hardware's TBQP against the
* driver's head index before re-asserting TSTART, so on a healthy
* ring this is a no-op at the hardware level. The watchdog only
* adds the missing trigger.
*/
static void macb_tx_stall_watchdog(struct work_struct *work)
{
struct macb_queue *queue = container_of(to_delayed_work(work),
struct macb_queue,
tx_stall_watchdog_work);
struct macb *bp = queue->bp;
unsigned int cur_tail, cur_head;
bool stalled = false;
unsigned long flags;

if (!netif_running(bp->dev))
return;

spin_lock_irqsave(&queue->tx_ptr_lock, flags);
cur_tail = queue->tx_tail;
cur_head = queue->tx_head;
if (cur_head != cur_tail && !queue->tx_stall_tail_moved)
stalled = true;
queue->tx_stall_tail_moved = false;
spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);

if (stalled) {
netdev_warn_once(bp->dev,
"TX stall detected on queue %u (tail=%u head=%u); re-kicking TSTART\n",
(unsigned int)(queue - bp->queues),
cur_tail, cur_head);
macb_tx_restart(queue);
}

schedule_delayed_work(&queue->tx_stall_watchdog_work,
msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
}

static void macb_hresp_error_task(struct work_struct *work)
{
struct macb *bp = from_work(bp, work, hresp_err_bh_work);
Expand Down Expand Up @@ -2630,6 +2702,10 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
queue->tx_pending = 1;

macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
/* Flush the PCIe posted-write queue; see the comment in
* macb_tx_restart() for the reasoning.
*/
(void)macb_readl(bp, NCR);
spin_unlock(&bp->lock);

if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
Expand Down Expand Up @@ -3277,6 +3353,9 @@ static int macb_open(struct net_device *dev)
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
napi_enable(&queue->napi_rx);
napi_enable(&queue->napi_tx);
queue->tx_stall_tail_moved = true;
schedule_delayed_work(&queue->tx_stall_watchdog_work,
msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
}

macb_init_hw(bp);
Expand Down Expand Up @@ -3323,6 +3402,7 @@ static int macb_close(struct net_device *dev)
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
napi_disable(&queue->napi_rx);
napi_disable(&queue->napi_tx);
cancel_delayed_work_sync(&queue->tx_stall_watchdog_work);
netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
}

Expand Down Expand Up @@ -4921,6 +5001,8 @@ static int macb_init(struct platform_device *pdev)
}

INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
INIT_DELAYED_WORK(&queue->tx_stall_watchdog_work,
macb_tx_stall_watchdog);
q++;
}

Expand Down
Loading