Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions patches-sonic/driver-arista-pci-aer-disable-recovery.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
From: yurypm <yurypm@arista.com>
Date: Mon, 25 May 2026 13:45:51 +0000
Subject: Add noaer_recovery pci kernel boot option

AER error recovery is part of the AER error handling subsystem in
the Linux kernel. AER is enabled by default in the SONiC Linux
kernel. In Arista EOS, AER recovery is disabled for all Arista
devices and has never been tested. Enabling AER recovery on large
modular systems with a complex PCIe tree could cause unexpected
behavior and side effects. It would be nice to have an option to
disable AER recovery on some chassis.

Add pci=noaer_recovery kernel boot option to disable AER error
recovery when an uncorrectable error is reported.

Signed-off-by: Yury Murashka <yurypm@arista.com>
---
Documentation/admin-guide/kernel-parameters.txt | 4 ++++
drivers/pci/pci.c | 2 ++
drivers/pci/pci.h | 2 ++
drivers/pci/pcie/err.c | 15 +++++++++++++++
4 files changed, 23 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f402bba..2bbd7ab 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4483,6 +4483,10 @@
noaer [PCIE] If the PCIEAER kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of PCIE advanced error reporting.
+ noaer_recovery [PCIE] If the PCIEAER kernel config parameter is
+ enabled, this kernel boot option can be used to
+ disable AER error recovery when an uncorrectable
+ error is reported.
nodomains [PCI] Disable support for multiple PCI
root domains (aka PCI segments, in ACPI-speak).
nommconf [X86] Disable use of MMCONFIG for PCI
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 51a09e4..77d0786 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6896,6 +6896,8 @@ static int __init pci_setup(char *str)
pcie_ats_disabled = true;
} else if (!strcmp(str, "noaer")) {
pci_no_aer();
+ } else if (!strcmp(str, "noaer_recovery")) {
+ pci_no_aer_recovery();
} else if (!strcmp(str, "earlydump")) {
pci_early_dump = true;
} else if (!strncmp(str, "realloc=", 8)) {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 65df6d2..551b6e8 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -826,6 +826,7 @@ static inline void of_pci_remove_node(struct pci_dev *pdev) { }

#ifdef CONFIG_PCIEAER
void pci_no_aer(void);
+void pci_no_aer_recovery(void);
void pci_aer_init(struct pci_dev *dev);
void pci_aer_exit(struct pci_dev *dev);
extern const struct attribute_group aer_stats_attr_group;
@@ -836,6 +837,7 @@ void pci_save_aer_state(struct pci_dev *dev);
void pci_restore_aer_state(struct pci_dev *dev);
#else
static inline void pci_no_aer(void) { }
+static inline void pci_no_aer_recovery(void) { }
static inline void pci_aer_init(struct pci_dev *d) { }
static inline void pci_aer_exit(struct pci_dev *d) { }
static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 3109077..bb5ec0c 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -21,6 +21,13 @@
#include "portdrv.h"
#include "../pci.h"

+static int pcie_aer_recovery_disable = 0;
+
+void pci_no_aer_recovery(void)
+{
+ pcie_aer_recovery_disable = 1;
+}
+
static pci_ers_result_t merge_result(enum pci_ers_result orig,
enum pci_ers_result new)
{
@@ -197,6 +204,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);

+ if (pcie_aer_recovery_disable) {
+ if (host->native_aer || pcie_ports_native) {
+ pcie_clear_device_status(dev);
+ pci_aer_clear_nonfatal_status(dev);
+ }
+ return status;
+ }
+
/*
* If the error was detected by a Root Port, Downstream Port, RCEC,
* or RCiEP, recovery runs on the device itself. For Ports, that
121 changes: 121 additions & 0 deletions patches-sonic/driver-arista-pci-dpc-disable.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
From: yurypm <yurypm@arista.com>
Date: Mon, 25 May 2026 13:45:50 +0000
Subject: Add nodpc pci kernel boot option

PCI DPC (Downstream Port Containment) is enabled by default in the
SONiC Linux kernel. DPC support can be advertised by PCIe devices,
but it might not be fully supported in the firmware. In Arista EOS,
DPC is disabled for all Arista devices and has never been tested.
Enabling DPC could cause unexpected behavior and side effects. It
would be nice to have an option to disable DPC on some chassis.

Add pci=nodpc kernel boot option to disable PCI DPC.

Signed-off-by: Yury Murashka <yurypm@arista.com>
---
Documentation/admin-guide/kernel-parameters.txt | 3 +++
drivers/pci/pci.c | 2 ++
drivers/pci/pci.h | 2 ++
drivers/pci/pcie/dpc.c | 16 +++++++++++++---
4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2bbd7ab..068891e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4480,6 +4480,9 @@
through ports 0xC000-0xCFFF).
See http://wiki.osdev.org/PCI for more info
on the configuration access mechanisms.
+ nodpc [PCIE] If the PCIE_DPC kernel config parameter is
+ enabled, this kernel boot option can be used to
+ disable the use of PCIE DPC.
noaer [PCIE] If the PCIEAER kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of PCIE advanced error reporting.
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 77d0786..f6a4c2f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6894,6 +6894,8 @@ static int __init pci_setup(char *str)
} else if (!strncmp(str, "noats", 5)) {
pr_info("PCIe: ATS is disabled\n");
pcie_ats_disabled = true;
+ } else if (!strcmp(str, "nodpc")) {
+ pci_no_dpc();
} else if (!strcmp(str, "noaer")) {
pci_no_aer();
} else if (!strcmp(str, "noaer_recovery")) {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 551b6e8..bece8b9 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -535,6 +535,7 @@ struct rcec_ea {
#endif

#ifdef CONFIG_PCIE_DPC
+void pci_no_dpc(void);
void pci_save_dpc_state(struct pci_dev *dev);
void pci_restore_dpc_state(struct pci_dev *dev);
void pci_dpc_init(struct pci_dev *pdev);
@@ -542,6 +543,7 @@ void dpc_process_error(struct pci_dev *pdev);
pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
bool pci_dpc_recovered(struct pci_dev *pdev);
#else
+static inline void pci_no_dpc(void) { }
static inline void pci_save_dpc_state(struct pci_dev *dev) { }
static inline void pci_restore_dpc_state(struct pci_dev *dev) { }
static inline void pci_dpc_init(struct pci_dev *pdev) { }
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index cdc5431..8eb2a1f 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -43,12 +43,19 @@ static const char * const rp_pio_error_string[] = {
"Memory Request Completion Timeout", /* Bit Position 18 */
};

+static int pcie_dpc_disable = 0;
+
+void pci_no_dpc(void)
+{
+ pcie_dpc_disable = 1;
+}
+
void pci_save_dpc_state(struct pci_dev *dev)
{
struct pci_cap_saved_state *save_state;
u16 *cap;

- if (!pci_is_pcie(dev))
+ if (pcie_dpc_disable || !pci_is_pcie(dev))
return;

save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
@@ -64,7 +71,7 @@ void pci_restore_dpc_state(struct pci_dev *dev)
struct pci_cap_saved_state *save_state;
u16 *cap;

- if (!pci_is_pcie(dev))
+ if (pcie_dpc_disable || !pci_is_pcie(dev))
return;

save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
@@ -104,7 +111,7 @@ bool pci_dpc_recovered(struct pci_dev *pdev)
{
struct pci_host_bridge *host;

- if (!pdev->dpc_cap)
+ if (pcie_dpc_disable || !pdev->dpc_cap)
return false;

/*
@@ -398,6 +405,9 @@ void pci_dpc_init(struct pci_dev *pdev)
{
u16 cap;

+ if (pcie_dpc_disable)
+ return;
+
pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
if (!pdev->dpc_cap)
return;
2 changes: 2 additions & 0 deletions patches-sonic/series
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ driver-arista-pci-reassign-pref-mem.patch
driver-arista-mmcblk-not-working-on-AMD-platforms.patch
driver-arista-restrict-eMMC-drive-to-50Mhz-from-userland.patch
driver-arista-i2c-designware-shutdown.patch
driver-arista-pci-aer-disable-recovery.patch
driver-arista-pci-dpc-disable.patch
driver-support-sff-8436-eeprom.patch
driver-support-sff-8436-eeprom-update.patch
driver-sff-8436-use-nvmem-framework.patch
Expand Down