|
31 | 31 | #include <asm/set_memory.h> |
32 | 32 |
|
33 | 33 | #ifdef CONFIG_X86_64 |
| 34 | +#include <linux/cleanup.h> |
34 | 35 |
|
35 | 36 | #include <asm/apic.h> |
36 | 37 | #include <uapi/asm/mtrr.h> |
@@ -221,19 +222,29 @@ struct apicid_to_cpuid_entry { |
221 | 222 | struct hlist_node node; |
222 | 223 | }; |
223 | 224 |
|
| 225 | +static int get_cpuid(int apicid) |
| 226 | +{ |
| 227 | + struct apicid_to_cpuid_entry *found; |
| 228 | + |
| 229 | + hash_for_each_possible(apicid_to_cpuid, found, node, apicid) { |
| 230 | + if (found->apicid == apicid) |
| 231 | + return found->cpuid; |
| 232 | + } |
| 233 | + |
| 234 | + return -EINVAL; |
| 235 | +} |
| 236 | + |
224 | 237 | /* |
225 | 238 | * Sets the cpu described by apicid in cpu_mask. |
226 | 239 | * Returns 0 on success, -EINVAL if no cpu matches the apicid. |
227 | 240 | */ |
228 | 241 | static int mshv_tdx_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) |
229 | 242 | { |
230 | | - struct apicid_to_cpuid_entry *found; |
231 | 243 |
|
232 | | - hash_for_each_possible(apicid_to_cpuid, found, node, apicid) { |
233 | | - if (found->apicid != apicid) |
234 | | - continue; |
| 244 | + int cpu = get_cpuid(apicid); |
235 | 245 |
|
236 | | - cpumask_set_cpu(found->cpuid, cpu_mask); |
| 246 | + if (cpu >= 0) { |
| 247 | + cpumask_set_cpu(cpu, cpu_mask); |
237 | 248 | return 0; |
238 | 249 | } |
239 | 250 |
|
@@ -432,13 +443,22 @@ static int mshv_vtl_get_vsm_regs(void) |
432 | 443 | return ret; |
433 | 444 | } |
434 | 445 |
|
| 446 | +static void do_assert_single_proxy_intr(const u32 vector, struct mshv_vtl_run *run) |
| 447 | +{ |
| 448 | + /* See mshv_tdx_handle_simple_icr_write() on how the bank and bit are computed. */ |
| 449 | + const u32 bank = vector >> 5; |
| 450 | + const u32 masked_irr = BIT(vector & 0x1f) & ~READ_ONCE(run->proxy_irr_blocked[bank]); |
| 451 | + |
| 452 | + /* nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write */ |
| 453 | + atomic_or(masked_irr, (atomic_t *)&run->proxy_irr[bank]); |
| 454 | +} |
| 455 | + |
435 | 456 | static void mshv_vtl_scan_proxy_interrupts(struct hv_per_cpu_context *per_cpu) |
436 | 457 | { |
437 | 458 | struct hv_message *msg; |
438 | 459 | u32 message_type; |
439 | 460 | struct hv_x64_proxy_interrupt_message_payload *proxy; |
440 | 461 | struct mshv_vtl_run *run; |
441 | | - u32 vector; |
442 | 462 |
|
443 | 463 | msg = (struct hv_message *)per_cpu->synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX; |
444 | 464 | for (;;) { |
@@ -468,13 +488,9 @@ static void mshv_vtl_scan_proxy_interrupts(struct hv_per_cpu_context *per_cpu) |
468 | 488 | } |
469 | 489 | } else { |
470 | 490 | /* A malicious hypervisor might set a vector > 255. */ |
471 | | - vector = READ_ONCE(proxy->u.asserted_vector) & 0xff; |
472 | | - const u32 bank = vector / 32; |
473 | | - const u32 masked_irr = BIT(vector % 32) & |
474 | | - ~READ_ONCE(run->proxy_irr_blocked[bank]); |
| 491 | + const u32 vector = READ_ONCE(proxy->u.asserted_vector) & 0xff; |
475 | 492 |
|
476 | | - /* nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write */ |
477 | | - atomic_or(masked_irr, (atomic_t *)&run->proxy_irr[bank]); |
| 493 | + do_assert_single_proxy_intr(vector, run); |
478 | 494 | } |
479 | 495 |
|
480 | 496 | WRITE_ONCE(run->scan_proxy_irr, 1); |
@@ -1122,8 +1138,8 @@ static int mshv_tdx_handle_simple_icr_write(struct tdx_vp_context *context) |
1122 | 1138 | const u32 dest = context->l2_enter_guest_state.rdx; |
1123 | 1139 | const u8 shorthand = (icr_lo >> 18) & 0b11; |
1124 | 1140 | const u8 vector = icr_lo; |
1125 | | - const u64 bank = vector / 32; |
1126 | | - const u32 mask = BIT(vector % 32); |
| 1141 | + const u64 bank = vector >> 5; /* Each bank is 32 bits. Divide by 32 to find the bank. */ |
| 1142 | + const u32 mask = BIT(vector & 0x1f); /* Bit in the bank is the remainder of the division. */ |
1127 | 1143 | const u32 self = smp_processor_id(); |
1128 | 1144 |
|
1129 | 1145 | bool send_ipi = false; |
@@ -1569,6 +1585,234 @@ static long mshv_vtl_ioctl_read_vmx_cr4_fixed1(void __user *user_arg) |
1569 | 1585 |
|
1570 | 1586 | return copy_to_user(user_arg, &value, sizeof(value)) ? -EFAULT : 0; |
1571 | 1587 | } |
| 1588 | + |
| 1589 | +static int hyperv_vtl_redirected_intr_alloc(struct irq_domain *domain, unsigned int virq, |
| 1590 | + unsigned int nr_irqs, void *arg) |
| 1591 | +{ |
| 1592 | + int ret; |
| 1593 | + |
| 1594 | + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); |
| 1595 | + if (ret < 0) |
| 1596 | + return ret; |
| 1597 | + |
| 1598 | + /* |
| 1599 | + * The dummy chip does not have irq_set_affinity(). The affinity of an |
| 1600 | + * IRQ cannot be changed after initialization (see |
| 1601 | + * __irq_can_set_affinity()). |
| 1602 | + */ |
| 1603 | + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); |
| 1604 | + |
| 1605 | + return 0; |
| 1606 | +} |
| 1607 | + |
| 1608 | +static const struct irq_domain_ops hyperv_vtl_redirected_intr_ops = { |
| 1609 | + .alloc = hyperv_vtl_redirected_intr_alloc, |
| 1610 | + .free = irq_domain_free_irqs_common, |
| 1611 | +}; |
| 1612 | + |
| 1613 | +#define REDIRECTED_INTR_NAME_LEN 64 |
| 1614 | +struct redirected_intr { |
| 1615 | + int irq; |
| 1616 | + int proxy_vector; |
| 1617 | + u32 apic_id; |
| 1618 | + char name[REDIRECTED_INTR_NAME_LEN]; |
| 1619 | + struct list_head list; |
| 1620 | +}; |
| 1621 | + |
| 1622 | +static struct list_head redirected_intr_list; |
| 1623 | +static struct mutex redirected_intr_lock; |
| 1624 | + |
| 1625 | +static struct irq_domain *redirected_intr_domain; |
| 1626 | +static struct fwnode_handle *redirected_intr_fwnode; |
| 1627 | + |
| 1628 | +static int __init ms_hyperv_init_redirected_intr(void) |
| 1629 | +{ |
| 1630 | + redirected_intr_fwnode = irq_domain_alloc_named_fwnode("hyperv-redirected-intr"); |
| 1631 | + if (!redirected_intr_fwnode) |
| 1632 | + return -ENODEV; |
| 1633 | + |
| 1634 | + redirected_intr_domain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, |
| 1635 | + redirected_intr_fwnode, |
| 1636 | + &hyperv_vtl_redirected_intr_ops, |
| 1637 | + NULL); |
| 1638 | + if (!redirected_intr_domain) { |
| 1639 | + irq_domain_free_fwnode(redirected_intr_fwnode); |
| 1640 | + return -ENODEV; |
| 1641 | + } |
| 1642 | + |
| 1643 | + INIT_LIST_HEAD(&redirected_intr_list); |
| 1644 | + mutex_init(&redirected_intr_lock); |
| 1645 | + |
| 1646 | + return 0; |
| 1647 | +} |
| 1648 | + |
| 1649 | +static void ms_hyperv_free_redirected_intr(void) |
| 1650 | +{ |
| 1651 | + struct redirected_intr *rintr, *tmp; |
| 1652 | + |
| 1653 | + guard(mutex)(&redirected_intr_lock); |
| 1654 | + if (!redirected_intr_domain) |
| 1655 | + return; |
| 1656 | + |
| 1657 | + list_for_each_entry_safe(rintr, tmp, &redirected_intr_list, list) { |
| 1658 | + free_irq(rintr->irq, rintr); |
| 1659 | + list_del(&rintr->list); |
| 1660 | + kfree(rintr); |
| 1661 | + } |
| 1662 | + |
| 1663 | + irq_domain_remove(redirected_intr_domain); |
| 1664 | + irq_domain_free_fwnode(redirected_intr_fwnode); |
| 1665 | + redirected_intr_domain = NULL; |
| 1666 | + redirected_intr_fwnode = NULL; |
| 1667 | +} |
| 1668 | + |
| 1669 | +static irqreturn_t handle_single_proxy_intr(int irq, void *data) |
| 1670 | +{ |
| 1671 | + struct mshv_vtl_run *run = mshv_vtl_this_run(); |
| 1672 | + struct redirected_intr *rintr = data; |
| 1673 | + |
| 1674 | + /* |
| 1675 | + * This function is called when the proxy interrupt is triggered. |
| 1676 | + * We assert that only one proxy interrupt is active at a time. |
| 1677 | + */ |
| 1678 | + do_assert_single_proxy_intr(rintr->proxy_vector, run); |
| 1679 | + WRITE_ONCE(run->scan_proxy_irr, 1); |
| 1680 | + |
| 1681 | + apic_eoi(); |
| 1682 | + |
| 1683 | + return IRQ_HANDLED; |
| 1684 | +} |
| 1685 | + |
| 1686 | +/* must be called with redirected_intr_lock */ |
| 1687 | +static struct redirected_intr *find_redirect_intr(u32 proxy_vector, u32 apic_id) |
| 1688 | +{ |
| 1689 | + struct redirected_intr *rintr; |
| 1690 | + |
| 1691 | + list_for_each_entry(rintr, &redirected_intr_list, list) { |
| 1692 | + if (rintr->proxy_vector == proxy_vector && |
| 1693 | + rintr->apic_id == apic_id) |
| 1694 | + return rintr; |
| 1695 | + } |
| 1696 | + |
| 1697 | + return NULL; |
| 1698 | +} |
| 1699 | + |
| 1700 | +static int mshv_vtl_map_redirected_intr(u32 proxy_vector, u32 apic_id) |
| 1701 | +{ |
| 1702 | + struct irq_affinity_desc affinity_desc = {}; |
| 1703 | + struct irq_alloc_info info = {}; |
| 1704 | + struct redirected_intr *rintr; |
| 1705 | + int irq, ret, cpu; |
| 1706 | + |
| 1707 | + if (proxy_vector > 255) |
| 1708 | + return -EINVAL; |
| 1709 | + |
| 1710 | + cpu = get_cpuid(apic_id); |
| 1711 | + if (cpu < 0 || !cpu_online(cpu)) |
| 1712 | + return -EINVAL; |
| 1713 | + |
| 1714 | + guard(mutex)(&redirected_intr_lock); |
| 1715 | + |
| 1716 | + rintr = find_redirect_intr(proxy_vector, apic_id); |
| 1717 | + if (rintr) |
| 1718 | + /* Already mapped. Just return the HW vector we are using. */ |
| 1719 | + return irq_cfg(rintr->irq)->vector; |
| 1720 | + |
| 1721 | + rintr = kzalloc(sizeof(*rintr), GFP_KERNEL); |
| 1722 | + if (!rintr) |
| 1723 | + return -ENOMEM; |
| 1724 | + |
| 1725 | + cpumask_set_cpu(cpu, &affinity_desc.mask); |
| 1726 | + |
| 1727 | + /* The x86_vector_domain needs a non-NULL info. */ |
| 1728 | + irq = __irq_domain_alloc_irqs(redirected_intr_domain, -1, 1, NUMA_NO_NODE, |
| 1729 | + &info, false, &affinity_desc); |
| 1730 | + if (irq < 0) { |
| 1731 | + ret = irq; |
| 1732 | + goto out; |
| 1733 | + } |
| 1734 | + |
| 1735 | + snprintf(rintr->name, REDIRECTED_INTR_NAME_LEN, |
| 1736 | + "hyperv-redir-intr-%x.%x", apic_id, proxy_vector); |
| 1737 | + /* |
| 1738 | + * We do not want the IRQ to be moved to a different CPU. Both user |
| 1739 | + * space and the hypervisor have agreed on the CPU that the interrupt |
| 1740 | + * should target. |
| 1741 | + */ |
| 1742 | + ret = request_irq(irq, handle_single_proxy_intr, IRQF_NOBALANCING, |
| 1743 | + rintr->name, rintr); |
| 1744 | + if (ret) |
| 1745 | + goto out; |
| 1746 | + |
| 1747 | + rintr->irq = irq; |
| 1748 | + rintr->proxy_vector = proxy_vector; |
| 1749 | + rintr->apic_id = apic_id; |
| 1750 | + INIT_LIST_HEAD(&rintr->list); |
| 1751 | + list_add(&rintr->list, &redirected_intr_list); |
| 1752 | + |
| 1753 | + return irq_cfg(irq)->vector; |
| 1754 | + |
| 1755 | +out: |
| 1756 | + kfree(rintr); |
| 1757 | + return ret; |
| 1758 | +} |
| 1759 | + |
| 1760 | +static int mshv_vtl_unmap_redirected_intr(u32 hw_vector, u32 apic_id) |
| 1761 | +{ |
| 1762 | + struct redirected_intr *rintr; |
| 1763 | + |
| 1764 | + if (hw_vector > 255) |
| 1765 | + return -EINVAL; |
| 1766 | + |
| 1767 | + guard(mutex)(&redirected_intr_lock); |
| 1768 | + list_for_each_entry(rintr, &redirected_intr_list, list) { |
| 1769 | + unsigned int vector = irq_cfg(rintr->irq)->vector; |
| 1770 | + |
| 1771 | + if (vector == hw_vector && rintr->apic_id == apic_id) { |
| 1772 | + free_irq(rintr->irq, rintr); |
| 1773 | + list_del(&rintr->list); |
| 1774 | + kfree(rintr); |
| 1775 | + return 0; |
| 1776 | + } |
| 1777 | + } |
| 1778 | + |
| 1779 | + return -ENOENT; |
| 1780 | +} |
| 1781 | + |
| 1782 | +static long mshv_vtl_ioctl_setup_redirected_intr(void __user *user_arg) |
| 1783 | +{ |
| 1784 | + struct mshv_map_device_intr intr_data; |
| 1785 | + int ret; |
| 1786 | + |
| 1787 | + if (copy_from_user(&intr_data, user_arg, sizeof(intr_data))) |
| 1788 | + return (long)-EFAULT; |
| 1789 | + |
| 1790 | + /* User space provides the hardware vector to unmap. */ |
| 1791 | + if (!intr_data.create_mapping) |
| 1792 | + return (long)mshv_vtl_unmap_redirected_intr(intr_data.vector, |
| 1793 | + intr_data.apic_id); |
| 1794 | + |
| 1795 | + /* |
| 1796 | + * User space provides the proxy vector it wants to map to a hardware |
| 1797 | + * vector. |
| 1798 | + */ |
| 1799 | + ret = mshv_vtl_map_redirected_intr(intr_data.vector, intr_data.apic_id); |
| 1800 | + if (ret < 0) |
| 1801 | + return (long)ret; |
| 1802 | + |
| 1803 | + /* |
| 1804 | + * The return value is the hardware vector to which the proxy vector |
| 1805 | + * is mapped. |
| 1806 | + */ |
| 1807 | + intr_data.vector = ret; |
| 1808 | + ret = copy_to_user(user_arg, &intr_data, sizeof(intr_data)) ? -EFAULT : 0; |
| 1809 | + |
| 1810 | + return (long)ret; |
| 1811 | +} |
| 1812 | + |
| 1813 | +#else |
| 1814 | +static inline int ms_hyperv_init_redirected_intr(void) { return 0; } |
| 1815 | +static inline void ms_hyperv_free_redirected_intr(void) { } |
1572 | 1816 | #endif |
1573 | 1817 |
|
1574 | 1818 | #if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) |
@@ -2063,6 +2307,9 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) |
2063 | 2307 | case MSHV_VTL_READ_VMX_CR4_FIXED1: |
2064 | 2308 | ret = mshv_vtl_ioctl_read_vmx_cr4_fixed1((void __user *)arg); |
2065 | 2309 | break; |
| 2310 | + case MSHV_VTL_MAP_REDIRECTED_DEVICE_INTERRUPT: |
| 2311 | + ret = mshv_vtl_ioctl_setup_redirected_intr((void __user *)arg); |
| 2312 | + break; |
2066 | 2313 | #endif |
2067 | 2314 |
|
2068 | 2315 | #if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) |
@@ -2814,6 +3061,10 @@ static int __init mshv_vtl_init(void) |
2814 | 3061 | goto free_mem; |
2815 | 3062 | } |
2816 | 3063 |
|
| 3064 | + ret = ms_hyperv_init_redirected_intr(); |
| 3065 | + if (ret) |
| 3066 | + goto free_mem; |
| 3067 | + |
2817 | 3068 | mshv_vtl_init_memory(); |
2818 | 3069 | mshv_vtl_set_idle(mshv_vtl_idle); |
2819 | 3070 |
|
@@ -2843,6 +3094,7 @@ static int __init mshv_vtl_init(void) |
2843 | 3094 | static void __exit mshv_vtl_exit(void) |
2844 | 3095 | { |
2845 | 3096 | mshv_setup_vtl_func(NULL, NULL, NULL); |
| 3097 | + ms_hyperv_free_redirected_intr(); |
2846 | 3098 | mshv_tdx_free_apicid_to_cpuid_mapping(); |
2847 | 3099 | misc_deregister(&mshv_vtl_sint_dev); |
2848 | 3100 | misc_deregister(&mshv_vtl_hvcall); |
|
0 commit comments