diff --git a/Cargo.toml b/Cargo.toml index ae96cce1..a19c4326 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ uefi-raw = "=0.9.0" ############# general ############## iommu = [] # supported by: aarch64, riscv64 pci = [] # supported by: aarch64, riscv64,loongarch64 +sriov = ["pci"] # SR-IOV PF/VF enumeration and passthrough print_timestamp = [] # print timestamp when logging ############# IOMMU ############## @@ -65,6 +66,8 @@ ecam_pcie = [] # Standard ECAM mechanism (default for most platforms) dwc_pcie = [] # DesignWare PCIe Core mechanism (CFG0/CFG1, used by RK3568) loongarch64_pcie = [] # LoongArch PCIe mechanism (used by LoongArch platforms) no_pcie_bar_realloc = [] +dwc_msi = ["dwc_pcie"] # DesignWare Native MSI (used when arch doesn't provide MSI, e.g., ARM without ITS) +pci_init_delay = ["dwc_pcie"] # Delay hvisor PCI init until guest DBI offset 0 access virtio_pci = [] # Virtual virtio pci support ############# aarch64 ############## diff --git a/platform/aarch64/imx8mp/board.rs b/platform/aarch64/imx8mp/board.rs index c9b5ece0..ecd6af44 100644 --- a/platform/aarch64/imx8mp/board.rs +++ b/platform/aarch64/imx8mp/board.rs @@ -21,6 +21,9 @@ use crate::{ config::*, }; +use crate::pci::vpci_dev::VpciDevType; +use crate::pci_dev; + pub const BOARD_NAME: &str = "imx8mp"; pub const BOARD_NCPUS: usize = 4; @@ -107,14 +110,23 @@ pub const ROOT_ZONE_MEMORY_REGIONS: [HvConfigMemoryRegion; 8] = [ // virtual_start: 0x30890000, // size: 0x1000, // }, // serial + // 0x32f00000 + // HvConfigMemoryRegion { + // mem_type: MEM_TYPE_IO, + // physical_start: 0x32f00000, + // virtual_start: 0x32f00000, + // size: 0x10000, + // }, // pcie-phy ]; pub const IRQ_WAKEUP_VIRTIO_DEVICE: usize = 32 + 0x20; pub const ROOT_ZONE_IRQS_BITMAP: &[BitmapWord] = &get_irqs_bitmap(&[ 35, 36, 37, 38, 45, 52, 55, 56, 57, 59, 64, 67, 75, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 135, 150, 151, 152, 162, + 105, 135, 150, 151, 152, 162, 172, 159, ]); +pub const ROOT_ZONE_IVC_CONFIG: [HvIvcConfig; 0] = []; + pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { is_aarch32: 0, gic_config: GicConfig::Gicv3(Gicv3Config { @@ -127,4 +139,39 @@ pub const ROOT_ARCH_ZONE_CONFIG: HvArchZoneConfig = HvArchZoneConfig { }), }; -pub const ROOT_ZONE_IVC_CONFIG: [HvIvcConfig; 0] = []; +pub const ROOT_PCI_CONFIG: &[HvPciConfig] = &[HvPciConfig { + ecam_base: 0x33800000, + ecam_size: 0x400000, + io_base: 0x1ff80000, + io_size: 0x10000, + pci_io_base: 0x0, + mem32_base: 0x1800_0000, + mem32_size: 0x7f0_0000, + pci_mem32_base: 0x1800_0000, + mem64_base: 0x0, + mem64_size: 0x0, + pci_mem64_base: 0x0, + bus_range_begin: 0x0, + bus_range_end: 0x1f, + domain: 0x0, +}]; + +pub const ROOT_DWC_ATU_CONFIG: &[HvDwcAtuConfig] = &[HvDwcAtuConfig { + ecam_base: 0x33800000, + dbi_base: 0x33800000, + dbi_size: 0x400000, + apb_base: 0x0, + apb_size: 0x0, + cfg_base: 0x1ff00000, + cfg_size: 0x80000, + io_cfg_atu_shared: 1, + io_atu_index: 1, + dw_msi_irq: 172, +}]; + +pub const ROOT_PCI_DEVS: [HvPciDevConfig; 2] = [ + pci_dev!(0x0, 0x00, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x01, 0x0, 0x0 => 0x1, 0x0, 0x0, VpciDevType::Physical), + // pci_dev!(0x0, 0x02, 0x10, 0x0 => 0x02, 0x10, 0x0, VpciDevType::Physical), // SRIOV VF + // pci_dev!(0x0, 0x02, 0x10, 0x4 => 0x02, 0x10, 0x4, VpciDevType::Physical), // SRIOV VF +]; diff --git a/platform/aarch64/imx8mp/cargo/features b/platform/aarch64/imx8mp/cargo/features index 7acf44c6..e2b1a51a 100644 --- a/platform/aarch64/imx8mp/cargo/features +++ b/platform/aarch64/imx8mp/cargo/features @@ -1,2 +1,8 @@ gicv3 imx_uart +pci +dwc_pcie +no_pcie_bar_realloc +dwc_msi +pci_init_delay +sriov \ No newline at end of file diff --git a/platform/aarch64/imx8mp/configs/zone1-linux.json b/platform/aarch64/imx8mp/configs/zone1-linux.json index dd7d732a..086101e5 100644 --- a/platform/aarch64/imx8mp/configs/zone1-linux.json +++ b/platform/aarch64/imx8mp/configs/zone1-linux.json @@ -2,80 +2,154 @@ "arch": "arm64", "name": "linux2", "zone_id": 1, - "cpus": [2, 3], + "cpus": [ + 2, + 3 + ], "memory_regions": [ { "type": "ram", "physical_start": "0x50000000", - "virtual_start": "0x50000000", + "virtual_start": "0x50000000", "size": "0x30000000" }, { "type": "io", "physical_start": "0x30a60000", - "virtual_start": "0x30a60000", + "virtual_start": "0x30a60000", "size": "0x10000" }, { "type": "virtio", "physical_start": "0xa003c00", - "virtual_start": "0xa003c00", + "virtual_start": "0xa003c00", "size": "0x200" }, { "type": "virtio", "physical_start": "0xa003800", - "virtual_start": "0xa003800", + "virtual_start": "0xa003800", "size": "0x200" }, { "type": "virtio", "physical_start": "0xa003600", - "virtual_start": "0xa003600", + "virtual_start": "0xa003600", "size": "0x200" }, { "type": "io", "physical_start": "0x38000000", - "virtual_start": "0x38000000", + "virtual_start": "0x38000000", "size": "0x10000" }, { "type": "io", "physical_start": "0x38500000", - "virtual_start": "0x38500000", + "virtual_start": "0x38500000", "size": "0x20000" }, { "type": "io", "physical_start": "0x32e80000", - "virtual_start": "0x32e80000", + "virtual_start": "0x32e80000", "size": "0x20000" }, { "type": "io", "physical_start": "0x30c00000", - "virtual_start": "0x30c00000", + "virtual_start": "0x30c00000", "size": "0x400000" }, { "type": "io", "physical_start": "0x32fc0000", - "virtual_start": "0x32fc0000", + "virtual_start": "0x32fc0000", "size": "0x20000" } ], - "interrupts": [35, 37, 38, 45, 56, 57, 61, 75, 76, 78, 135, 162], + "interrupts": [ + 35, + 37, + 38, + 45, + 56, + 57, + 61, + 75, + 76, + 78, + 135, + 162, + 172 + ], "ivc_configs": [], "kernel_filepath": "./Image", "dtb_filepath": "./linux2.dtb", "kernel_load_paddr": "0x50400000", - "dtb_load_paddr": "0x50000000", - "entry_point": "0x50400000", + "dtb_load_paddr": "0x50000000", + "entry_point": "0x50400000", "arch_config": { + "gic_version": "v3", "gicd_base": "0x38800000", "gicd_size": "0x10000", "gicr_base": "0x38880000", - "gicr_size": "0xc0000" - } -} + "gicr_size": "0xc0000", + "is_aarch32": false + }, + "pci_config": [ + { + "ecam_base": "0x33800000", + "ecam_size": "0x400000", + "io_base": "0x1ff80000", + "io_size": "0x10000", + "pci_io_base": "0x0", + "mem32_base": "0x18000000", + "mem32_size": "0x7f00000", + "pci_mem32_base": "0x18000000", + "mem64_base": "0x0", + "mem64_size": "0x0", + "pci_mem64_base": "0x0", + "bus_range_begin": "0x0", + "bus_range_end": "0x1f", + "domain": "0x0" + } + ], + "dwc_atu_config": [ + { + "ecam_base": "0x33800000", + "dbi_base": "0x33800000", + "dbi_size": "0x400000", + "apb_base": "0x0", + "apb_size": "0x0", + "cfg_base": "0x1ff00000", + "cfg_size": "0x10000", + "io_cfg_atu_shared": 1, + "io_atu_index": 1, + "dw_msi_irq": 172 + } + ], + "num_pci_devs": 2, + "alloc_pci_devs": [ + { + "domain": "0x0", + "bus": "0x0", + "device": "0x0", + "function": "0x0", + "v_bus": "0x0", + "v_device": "0x0", + "v_function": "0x0", + "dev_type": "0" + }, + { + "domain": "0x0", + "bus": "0x1", + "device": "0x0", + "function": "0x1", + "v_bus": "0x1", + "v_device": "0x0", + "v_function": "0x0", + "dev_type": "0" + } + ] +} \ No newline at end of file diff --git a/platform/aarch64/qemu-gicv2/board.rs b/platform/aarch64/qemu-gicv2/board.rs index 26c0f715..920cefc8 100644 --- a/platform/aarch64/qemu-gicv2/board.rs +++ b/platform/aarch64/qemu-gicv2/board.rs @@ -119,7 +119,7 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [ pub const ROOT_ZONE_IVC_CONFIG: [HvIvcConfig; 0] = []; pub const ROOT_PCI_DEVS: [HvPciDevConfig; 3] = [ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x1, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x2, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x1, 0x0 => 0x0, 0x1, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x2, 0x0 => 0x0, 0x2, 0x0, VpciDevType::Physical), ]; diff --git a/platform/aarch64/qemu-gicv3/board.rs b/platform/aarch64/qemu-gicv3/board.rs index 56c39f6c..b3e96b38 100644 --- a/platform/aarch64/qemu-gicv3/board.rs +++ b/platform/aarch64/qemu-gicv3/board.rs @@ -117,8 +117,8 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { pub const ROOT_ZONE_IVC_CONFIG: [HvIvcConfig; 0] = []; pub const ROOT_PCI_DEVS: &[HvPciDevConfig] = &[ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x1, 0x0, VpciDevType::Physical), - // pci_dev!(0x0, 0x0, 0x3, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x5, 0x0, VpciDevType::StandardVdev), + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x1, 0x0 => 0x0, 0x3, 0x0, VpciDevType::Physical), + // pci_dev!(0x0, 0x0, 0x3, 0x0 => 0x0, 0x3, 0x0, VpciDevType::Physical), + // pci_dev!(0x0, 0x0, 0x5, 0x0 => 0x0, 0x5, 0x0, VpciDevType::StandardVdev), ]; diff --git a/platform/aarch64/qemu-gicv3/configs/zone1-linux.json b/platform/aarch64/qemu-gicv3/configs/zone1-linux.json index b9b5d68d..514c16f9 100644 --- a/platform/aarch64/qemu-gicv3/configs/zone1-linux.json +++ b/platform/aarch64/qemu-gicv3/configs/zone1-linux.json @@ -1,12 +1,15 @@ { "name": "linux2", "zone_id": 1, - "cpus": [2, 3], + "cpus": [ + 2, + 3 + ], "memory_regions": [ { "type": "ram", "physical_start": "0x50000000", - "virtual_start": "0x50000000", + "virtual_start": "0x50000000", "size": "0x30000000" }, { @@ -37,8 +40,8 @@ "kernel_filepath": "./Image", "dtb_filepath": "./zone1-linux.dtb", "kernel_load_paddr": "0x50400000", - "dtb_load_paddr": "0x50000000", - "entry_point": "0x50400000", + "dtb_load_paddr": "0x50000000", + "entry_point": "0x50400000", "arch_config": { "gic_version": "v3", "gicd_base": "0x8000000", @@ -49,22 +52,24 @@ "gits_size": "0x20000", "is_aarch32": false }, - "pci_config": [{ - "ecam_base": "0x4010000000", - "ecam_size": "0x10000000", - "io_base": "0x3eff0000", - "io_size": "0x10000", - "pci_io_base": "0x0", - "mem32_base": "0x10000000", - "mem32_size": "0x2eff0000", - "pci_mem32_base": "0x10000000", - "mem64_base": "0x8000000000", - "mem64_size": "0x8000000000", - "pci_mem64_base": "0x8000000000", - "bus_range_begin": "0x0", - "bus_range_end": "0x1f", - "domain": "0x0" - }], + "pci_config": [ + { + "ecam_base": "0x4010000000", + "ecam_size": "0x10000000", + "io_base": "0x3eff0000", + "io_size": "0x10000", + "pci_io_base": "0x0", + "mem32_base": "0x10000000", + "mem32_size": "0x2eff0000", + "pci_mem32_base": "0x10000000", + "mem64_base": "0x8000000000", + "mem64_size": "0x8000000000", + "pci_mem64_base": "0x8000000000", + "bus_range_begin": "0x0", + "bus_range_end": "0x1f", + "domain": "0x0" + } + ], "num_pci_devs": 2, "alloc_pci_devs": [ { @@ -72,13 +77,19 @@ "bus": "0x0", "device": "0x0", "function": "0x0", + "v_bus": "0x0", + "v_device": "0x0", + "v_function": "0x0", "dev_type": "0" }, { "domain": "0x0", "bus": "0x0", - "device": "0x1", + "device": "0x2", "function": "0x0", + "v_bus": "0x0", + "v_device": "0x1", + "v_function": "0x0", "dev_type": "0" } ] diff --git a/platform/aarch64/rk3568/board.rs b/platform/aarch64/rk3568/board.rs index a7618b52..6b3182d6 100644 --- a/platform/aarch64/rk3568/board.rs +++ b/platform/aarch64/rk3568/board.rs @@ -354,9 +354,11 @@ pub const ROOT_DWC_ATU_CONFIG: &[HvDwcAtuConfig] = &[HvDwcAtuConfig { cfg_base: 0xf2000000, cfg_size: 0x80000 * 2, io_cfg_atu_shared: 0, + io_atu_index: 0, + dw_msi_irq: 0, }]; pub const ROOT_PCI_DEVS: [HvPciDevConfig; 2] = [ - pci_dev!(0x0, 0x00, 0x0, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x01, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x00, 0x0, 0x0 => 0x00, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x01, 0x0, 0x0 => 0x01, 0x0, 0x0, VpciDevType::Physical), ]; diff --git a/platform/aarch64/rk3588/board.rs b/platform/aarch64/rk3588/board.rs index 4634691a..74184116 100644 --- a/platform/aarch64/rk3588/board.rs +++ b/platform/aarch64/rk3588/board.rs @@ -215,6 +215,6 @@ pub const ROOT_PCI_CONFIG: HvPciConfig = HvPciConfig { pub const ROOT_ZONE_IVC_CONFIG: [HvIvcConfig; 0] = []; pub const ROOT_PCI_DEVS: [HvPciDevConfig; 2] = [ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x1, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x1, 0x0 => 0x0, 0x1, 0x0, VpciDevType::Physical), ]; diff --git a/platform/loongarch64/ls3a5000/board.rs b/platform/loongarch64/ls3a5000/board.rs index 541d73a8..0b1f1390 100644 --- a/platform/loongarch64/ls3a5000/board.rs +++ b/platform/loongarch64/ls3a5000/board.rs @@ -183,32 +183,32 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { /* BUS 6 on X4 slot */ /* 06:00.0, 06:00.1, 06:00.2, 06:00.3 net */ pub const ROOT_PCI_DEVS: &[HvPciDevConfig] = &[ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), // 00:00.0 - pci_dev!(0x0, 0x0, 0x0, 0x1, VpciDevType::Physical), // 00:00.1 - pci_dev!(0x0, 0x0, 0x0, 0x2, VpciDevType::Physical), // 00:00.2 - pci_dev!(0x0, 0x0, 0x0, 0x3, VpciDevType::Physical), // 00:00.3 - pci_dev!(0x0, 0x0, 0x4, 0x0, VpciDevType::Physical), // 00:04.0 - pci_dev!(0x0, 0x0, 0x4, 0x1, VpciDevType::Physical), // 00:04.1 - pci_dev!(0x0, 0x0, 0x5, 0x0, VpciDevType::Physical), // 00:05.0 - pci_dev!(0x0, 0x0, 0x5, 0x1, VpciDevType::Physical), // 00:05.1 - // pci_dev!(0x0, 0x0, 0x6, 0x0, VpciDevType::Physical), // 00:06.0 - // pci_dev!(0x0, 0x0, 0x6, 0x1, VpciDevType::Physical), // 00:06.1 - // pci_dev!(0x0, 0x0, 0x6, 0x2, VpciDevType::Physical), // 00:06.2 - pci_dev!(0x0, 0x0, 0x7, 0x0, VpciDevType::Physical), // 00:07.0 - pci_dev!(0x0, 0x0, 0x8, 0x0, VpciDevType::Physical), // 00:08.0 - pci_dev!(0x0, 0x0, 0x9, 0x0, VpciDevType::Physical), // 00:09.0 - pci_dev!(0x0, 0x0, 0xa, 0x0, VpciDevType::Physical), // 00:0a.0 - pci_dev!(0x0, 0x0, 0xb, 0x0, VpciDevType::Physical), // 00:0b.0 - pci_dev!(0x0, 0x0, 0xc, 0x0, VpciDevType::Physical), // 00:0c.0 - pci_dev!(0x0, 0x0, 0xd, 0x0, VpciDevType::Physical), // 00:0d.0 - pci_dev!(0x0, 0x0, 0xf, 0x0, VpciDevType::Physical), // 00:0f.0 - pci_dev!(0x0, 0x0, 0x10, 0x0, VpciDevType::Physical), // 00:10.0 - pci_dev!(0x0, 0x0, 0x13, 0x0, VpciDevType::Physical), // 00:13.0 - pci_dev!(0x0, 0x0, 0x16, 0x0, VpciDevType::Physical), // 00:16.0 - pci_dev!(0x0, 0x0, 0x19, 0x0, VpciDevType::Physical), // 00:19.0 - pci_dev!(0x0, 0x2, 0x0, 0x0, VpciDevType::Physical), // 02:00.0 - pci_dev!(0x0, 0x5, 0x0, 0x0, VpciDevType::Physical), // 05:00.0 - pci_dev!(0x0, 0x6, 0x0, 0x0, VpciDevType::Physical), // 06:00.0 + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), // 00:00.0 + pci_dev!(0x0, 0x0, 0x0, 0x1 => 0x0, 0x0, 0x1, VpciDevType::Physical), // 00:00.1 + pci_dev!(0x0, 0x0, 0x0, 0x2 => 0x0, 0x0, 0x2, VpciDevType::Physical), // 00:00.2 + pci_dev!(0x0, 0x0, 0x0, 0x3 => 0x0, 0x0, 0x3, VpciDevType::Physical), // 00:00.3 + pci_dev!(0x0, 0x0, 0x4, 0x0 => 0x0, 0x4, 0x0, VpciDevType::Physical), // 00:04.0 + pci_dev!(0x0, 0x0, 0x4, 0x1 => 0x0, 0x4, 0x1, VpciDevType::Physical), // 00:04.1 + pci_dev!(0x0, 0x0, 0x5, 0x0 => 0x0, 0x5, 0x0, VpciDevType::Physical), // 00:05.0 + pci_dev!(0x0, 0x0, 0x5, 0x1 => 0x0, 0x5, 0x1, VpciDevType::Physical), // 00:05.1 + // pci_dev!(0x0, 0x0, 0x6, 0x0 => 0x0, 0x6, 0x0, VpciDevType::Physical), // 00:06.0 + // pci_dev!(0x0, 0x0, 0x6, 0x1 => 0x0, 0x6, 0x1, VpciDevType::Physical), // 00:06.1 + // pci_dev!(0x0, 0x0, 0x6, 0x2 => 0x0, 0x6, 0x2, VpciDevType::Physical), // 00:06.2 + pci_dev!(0x0, 0x0, 0x7, 0x0 => 0x0, 0x7, 0x0, VpciDevType::Physical), // 00:07.0 + pci_dev!(0x0, 0x0, 0x8, 0x0 => 0x0, 0x8, 0x0, VpciDevType::Physical), // 00:08.0 + pci_dev!(0x0, 0x0, 0x9, 0x0 => 0x0, 0x9, 0x0, VpciDevType::Physical), // 00:09.0 + pci_dev!(0x0, 0x0, 0xa, 0x0 => 0x0, 0xa, 0x0, VpciDevType::Physical), // 00:0a.0 + pci_dev!(0x0, 0x0, 0xb, 0x0 => 0x0, 0xb, 0x0, VpciDevType::Physical), // 00:0b.0 + pci_dev!(0x0, 0x0, 0xc, 0x0 => 0x0, 0xc, 0x0, VpciDevType::Physical), // 00:0c.0 + pci_dev!(0x0, 0x0, 0xd, 0x0 => 0x0, 0xd, 0x0, VpciDevType::Physical), // 00:0d.0 + pci_dev!(0x0, 0x0, 0xf, 0x0 => 0x0, 0xf, 0x0, VpciDevType::Physical), // 00:0f.0 + pci_dev!(0x0, 0x0, 0x10, 0x0 => 0x0, 0x10, 0x0, VpciDevType::Physical), // 00:10.0 + pci_dev!(0x0, 0x0, 0x13, 0x0 => 0x0, 0x13, 0x0, VpciDevType::Physical), // 00:13.0 + pci_dev!(0x0, 0x0, 0x16, 0x0 => 0x0, 0x16, 0x0, VpciDevType::Physical), // 00:16.0 + pci_dev!(0x0, 0x0, 0x19, 0x0 => 0x0, 0x19, 0x0, VpciDevType::Physical), // 00:19.0 + pci_dev!(0x0, 0x2, 0x0, 0x0 => 0x2, 0x0, 0x0, VpciDevType::Physical), // 02:00.0 + pci_dev!(0x0, 0x5, 0x0, 0x0 => 0x5, 0x0, 0x0, VpciDevType::Physical), // 05:00.0 + pci_dev!(0x0, 0x6, 0x0, 0x0 => 0x6, 0x0, 0x0, VpciDevType::Physical), // 06:00.0 ]; // bus << 8 | dev << 5 | func << 3 diff --git a/platform/loongarch64/ls3a5000/configs/zone1-linux.json b/platform/loongarch64/ls3a5000/configs/zone1-linux.json index cbfbf972..1a453d34 100644 --- a/platform/loongarch64/ls3a5000/configs/zone1-linux.json +++ b/platform/loongarch64/ls3a5000/configs/zone1-linux.json @@ -111,28 +111,35 @@ "arch_config": { "dummy": "0x1234" }, - "pci_config": [{ - "ecam_base": "0xfe00000000", - "ecam_size": "0x20000000", - "io_base": "0x18408000", - "io_size": "0x8000", - "pci_io_base": "0x00008000", - "mem32_base": "0x0", - "mem32_size": "0x0", - "pci_mem32_base": "0x0", - "mem64_base": "0x60000000", - "mem64_size": "0x20000000", - "pci_mem64_base": "0x60000000", - "bus_range_begin": "0x0", - "bus_range_end": "0x1f", - "domain": "0x0" - }], + "pci_config": [ + { + "ecam_base": "0xfe00000000", + "ecam_size": "0x20000000", + "io_base": "0x18408000", + "io_size": "0x8000", + "pci_io_base": "0x00008000", + "mem32_base": "0x0", + "mem32_size": "0x0", + "pci_mem32_base": "0x0", + "mem64_base": "0x60000000", + "mem64_size": "0x20000000", + "pci_mem64_base": "0x60000000", + "bus_range_begin": "0x0", + "bus_range_end": "0x1f", + "domain": "0x0" + } + ], "num_pci_devs": 1, - "alloc_pci_devs": [{ - "domain": "0x0", - "bus": "0x6", - "device": "0x1", - "function": "0x0", - "dev_type": "0" - }] + "alloc_pci_devs": [ + { + "domain": "0x0", + "bus": "0x6", + "device": "0x1", + "function": "0x0", + "v_bus": "0x6", + "v_device": "0x1", + "v_function": "0x0", + "dev_type": "0" + } + ] } \ No newline at end of file diff --git a/platform/loongarch64/ls3a5000/configs/zone2-linux.json b/platform/loongarch64/ls3a5000/configs/zone2-linux.json index 54ed3e2b..3c5d73f5 100644 --- a/platform/loongarch64/ls3a5000/configs/zone2-linux.json +++ b/platform/loongarch64/ls3a5000/configs/zone2-linux.json @@ -99,28 +99,35 @@ "arch_config": { "dummy": "0x1234" }, - "pci_config": [{ - "ecam_base": "0xfe00000000", - "ecam_size": "0x20000000", - "io_base": "0x18408000", - "io_size": "0x8000", - "pci_io_base": "0x00008000", - "mem32_base": "0x0", - "mem32_size": "0x0", - "pci_mem32_base": "0x0", - "mem64_base": "0x60000000", - "mem64_size": "0x20000000", - "pci_mem64_base": "0x60000000", - "bus_range_begin": "0x0", - "bus_range_end": "0x1f", - "domain": "0x0" - }], + "pci_config": [ + { + "ecam_base": "0xfe00000000", + "ecam_size": "0x20000000", + "io_base": "0x18408000", + "io_size": "0x8000", + "pci_io_base": "0x00008000", + "mem32_base": "0x0", + "mem32_size": "0x0", + "pci_mem32_base": "0x0", + "mem64_base": "0x60000000", + "mem64_size": "0x20000000", + "pci_mem64_base": "0x60000000", + "bus_range_begin": "0x0", + "bus_range_end": "0x1f", + "domain": "0x0" + } + ], "num_pci_devs": 1, - "alloc_pci_devs": [{ - "domain": "0x0", - "bus": "0x6", - "device": "0x2", - "function": "0x0", - "dev_type": "0" - }] + "alloc_pci_devs": [ + { + "domain": "0x0", + "bus": "0x6", + "device": "0x2", + "function": "0x0", + "v_bus": "0x6", + "v_device": "0x2", + "v_function": "0x0", + "dev_type": "0" + } + ] } \ No newline at end of file diff --git a/platform/loongarch64/ls3a5000/configs/zone3-linux.json b/platform/loongarch64/ls3a5000/configs/zone3-linux.json index 4f8e39b7..07811b89 100644 --- a/platform/loongarch64/ls3a5000/configs/zone3-linux.json +++ b/platform/loongarch64/ls3a5000/configs/zone3-linux.json @@ -99,28 +99,35 @@ "arch_config": { "dummy": "0x1234" }, - "pci_config": [{ - "ecam_base": "0xfe00000000", - "ecam_size": "0x20000000", - "io_base": "0x18408000", - "io_size": "0x8000", - "pci_io_base": "0x00008000", - "mem32_base": "0x0", - "mem32_size": "0x0", - "pci_mem32_base": "0x0", - "mem64_base": "0x60000000", - "mem64_size": "0x20000000", - "pci_mem64_base": "0x60000000", - "bus_range_begin": "0x0", - "bus_range_end": "0x1f", - "domain": "0x0" - }], + "pci_config": [ + { + "ecam_base": "0xfe00000000", + "ecam_size": "0x20000000", + "io_base": "0x18408000", + "io_size": "0x8000", + "pci_io_base": "0x00008000", + "mem32_base": "0x0", + "mem32_size": "0x0", + "pci_mem32_base": "0x0", + "mem64_base": "0x60000000", + "mem64_size": "0x20000000", + "pci_mem64_base": "0x60000000", + "bus_range_begin": "0x0", + "bus_range_end": "0x1f", + "domain": "0x0" + } + ], "num_pci_devs": 1, - "alloc_pci_devs": [{ - "domain": "0x0", - "bus": "0x6", - "device": "0x3", - "function": "0x0", - "dev_type": "0" - }] + "alloc_pci_devs": [ + { + "domain": "0x0", + "bus": "0x6", + "device": "0x3", + "function": "0x0", + "v_bus": "0x6", + "v_device": "0x3", + "v_function": "0x0", + "dev_type": "0" + } + ] } \ No newline at end of file diff --git a/platform/loongarch64/ls3a6000/board.rs b/platform/loongarch64/ls3a6000/board.rs index 6bd74afa..fdc6f018 100644 --- a/platform/loongarch64/ls3a6000/board.rs +++ b/platform/loongarch64/ls3a6000/board.rs @@ -183,32 +183,32 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { /* BUS 6 on X4 slot */ /* 06:00.0, 06:00.1, 06:00.2, 06:00.3 net */ pub const ROOT_PCI_DEVS: [HvPciDevConfig; 26] = [ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), // 00:00.0 - pci_dev!(0x0, 0x0, 0x0, 0x1, VpciDevType::Physical), // 00:00.1 - pci_dev!(0x0, 0x0, 0x0, 0x2, VpciDevType::Physical), // 00:00.2 - pci_dev!(0x0, 0x0, 0x0, 0x3, VpciDevType::Physical), // 00:00.3 - pci_dev!(0x0, 0x0, 0x4, 0x0, VpciDevType::Physical), // 00:04.0 - pci_dev!(0x0, 0x0, 0x4, 0x1, VpciDevType::Physical), // 00:04.1 - pci_dev!(0x0, 0x0, 0x5, 0x0, VpciDevType::Physical), // 00:05.0 - pci_dev!(0x0, 0x0, 0x5, 0x1, VpciDevType::Physical), // 00:05.1 - pci_dev!(0x0, 0x0, 0x6, 0x0, VpciDevType::Physical), // 00:06.0 - pci_dev!(0x0, 0x0, 0x6, 0x1, VpciDevType::Physical), // 00:06.1 - pci_dev!(0x0, 0x0, 0x6, 0x2, VpciDevType::Physical), // 00:06.2 - pci_dev!(0x0, 0x0, 0x7, 0x0, VpciDevType::Physical), // 00:07.0 - pci_dev!(0x0, 0x0, 0x8, 0x0, VpciDevType::Physical), // 00:08.0 - pci_dev!(0x0, 0x0, 0x9, 0x0, VpciDevType::Physical), // 00:09.0 - pci_dev!(0x0, 0x0, 0xa, 0x0, VpciDevType::Physical), // 00:0a.0 - pci_dev!(0x0, 0x0, 0xb, 0x0, VpciDevType::Physical), // 00:0b.0 - pci_dev!(0x0, 0x0, 0xc, 0x0, VpciDevType::Physical), // 00:0c.0 - pci_dev!(0x0, 0x0, 0xd, 0x0, VpciDevType::Physical), // 00:0d.0 - pci_dev!(0x0, 0x0, 0xf, 0x0, VpciDevType::Physical), // 00:0f.0 - pci_dev!(0x0, 0x0, 0x10, 0x0, VpciDevType::Physical), // 00:10.0 - pci_dev!(0x0, 0x0, 0x13, 0x0, VpciDevType::Physical), // 00:13.0 - pci_dev!(0x0, 0x0, 0x16, 0x0, VpciDevType::Physical), // 00:16.0 - pci_dev!(0x0, 0x0, 0x19, 0x0, VpciDevType::Physical), // 00:19.0 - pci_dev!(0x0, 0x2, 0x0, 0x0, VpciDevType::Physical), // 02:00.0 - pci_dev!(0x0, 0x5, 0x0, 0x0, VpciDevType::Physical), // 05:00.0 - pci_dev!(0x0, 0x6, 0x0, 0x0, VpciDevType::Physical), // 06:00.0 + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), // 00:00.0 + pci_dev!(0x0, 0x0, 0x0, 0x1 => 0x0, 0x0, 0x1, VpciDevType::Physical), // 00:00.1 + pci_dev!(0x0, 0x0, 0x0, 0x2 => 0x0, 0x0, 0x2, VpciDevType::Physical), // 00:00.2 + pci_dev!(0x0, 0x0, 0x0, 0x3 => 0x0, 0x0, 0x3, VpciDevType::Physical), // 00:00.3 + pci_dev!(0x0, 0x0, 0x4, 0x0 => 0x0, 0x4, 0x0, VpciDevType::Physical), // 00:04.0 + pci_dev!(0x0, 0x0, 0x4, 0x1 => 0x0, 0x4, 0x1, VpciDevType::Physical), // 00:04.1 + pci_dev!(0x0, 0x0, 0x5, 0x0 => 0x0, 0x5, 0x0, VpciDevType::Physical), // 00:05.0 + pci_dev!(0x0, 0x0, 0x5, 0x1 => 0x0, 0x5, 0x1, VpciDevType::Physical), // 00:05.1 + pci_dev!(0x0, 0x0, 0x6, 0x0 => 0x0, 0x6, 0x0, VpciDevType::Physical), // 00:06.0 + pci_dev!(0x0, 0x0, 0x6, 0x1 => 0x0, 0x6, 0x1, VpciDevType::Physical), // 00:06.1 + pci_dev!(0x0, 0x0, 0x6, 0x2 => 0x0, 0x6, 0x2, VpciDevType::Physical), // 00:06.2 + pci_dev!(0x0, 0x0, 0x7, 0x0 => 0x0, 0x7, 0x0, VpciDevType::Physical), // 00:07.0 + pci_dev!(0x0, 0x0, 0x8, 0x0 => 0x0, 0x8, 0x0, VpciDevType::Physical), // 00:08.0 + pci_dev!(0x0, 0x0, 0x9, 0x0 => 0x0, 0x9, 0x0, VpciDevType::Physical), // 00:09.0 + pci_dev!(0x0, 0x0, 0xa, 0x0 => 0x0, 0xa, 0x0, VpciDevType::Physical), // 00:0a.0 + pci_dev!(0x0, 0x0, 0xb, 0x0 => 0x0, 0xb, 0x0, VpciDevType::Physical), // 00:0b.0 + pci_dev!(0x0, 0x0, 0xc, 0x0 => 0x0, 0xc, 0x0, VpciDevType::Physical), // 00:0c.0 + pci_dev!(0x0, 0x0, 0xd, 0x0 => 0x0, 0xd, 0x0, VpciDevType::Physical), // 00:0d.0 + pci_dev!(0x0, 0x0, 0xf, 0x0 => 0x0, 0xf, 0x0, VpciDevType::Physical), // 00:0f.0 + pci_dev!(0x0, 0x0, 0x10, 0x0 => 0x0, 0x10, 0x0, VpciDevType::Physical), // 00:10.0 + pci_dev!(0x0, 0x0, 0x13, 0x0 => 0x0, 0x13, 0x0, VpciDevType::Physical), // 00:13.0 + pci_dev!(0x0, 0x0, 0x16, 0x0 => 0x0, 0x16, 0x0, VpciDevType::Physical), // 00:16.0 + pci_dev!(0x0, 0x0, 0x19, 0x0 => 0x0, 0x19, 0x0, VpciDevType::Physical), // 00:19.0 + pci_dev!(0x0, 0x2, 0x0, 0x0 => 0x2, 0x0, 0x0, VpciDevType::Physical), // 02:00.0 + pci_dev!(0x0, 0x5, 0x0, 0x0 => 0x5, 0x0, 0x0, VpciDevType::Physical), // 05:00.0 + pci_dev!(0x0, 0x6, 0x0, 0x0 => 0x6, 0x0, 0x0, VpciDevType::Physical), // 06:00.0 ]; // bus << 8 | dev << 5 | func << 3 diff --git a/platform/riscv64/qemu-aia/board.rs b/platform/riscv64/qemu-aia/board.rs index bcfa1d78..7880cd9f 100644 --- a/platform/riscv64/qemu-aia/board.rs +++ b/platform/riscv64/qemu-aia/board.rs @@ -93,8 +93,8 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { pub const ROOT_ZONE_IVC_CONFIG: &[HvIvcConfig] = &[]; pub const ROOT_PCI_DEVS: &[HvPciDevConfig] = &[ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x1, 0x0, VpciDevType::Physical), - // pci_dev!(0x0, 0x0, 0x3, 0x0, VpciDevType::Physical), - // pci_dev!(0x0, 0x0, 0x5, 0x0, VpciDevType::StandardVdev), + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x1, 0x0 => 0x0, 0x1, 0x0, VpciDevType::Physical), + // pci_dev!(0x0, 0x0, 0x3, 0x0 => 0x0, 0x3, 0x0, VpciDevType::Physical), + // pci_dev!(0x0, 0x0, 0x5, 0x0 => 0x0, 0x5, 0x0, VpciDevType::StandardVdev), ]; diff --git a/platform/riscv64/qemu-plic/board.rs b/platform/riscv64/qemu-plic/board.rs index 6622c121..84c7dbcc 100644 --- a/platform/riscv64/qemu-plic/board.rs +++ b/platform/riscv64/qemu-plic/board.rs @@ -104,8 +104,8 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { pub const ROOT_ZONE_IVC_CONFIG: &[HvIvcConfig] = &[]; pub const ROOT_PCI_DEVS: &[HvPciDevConfig] = &[ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x1, 0x0, VpciDevType::Physical), - // pci_dev!(0x0, 0x0, 0x3, 0x0, VpciDevType::Physical), - // pci_dev!(0x0, 0x0, 0x5, 0x0, VpciDevType::StandardVdev), + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x1, 0x0 => 0x0, 0x1, 0x0, VpciDevType::Physical), + // pci_dev!(0x0, 0x0, 0x3, 0x0 => 0x0, 0x3, 0x0, VpciDevType::Physical), + // pci_dev!(0x0, 0x0, 0x5, 0x0 => 0x0, 0x5, 0x0, VpciDevType::StandardVdev), ]; diff --git a/platform/x86_64/ecx-2300f-peg/board.rs b/platform/x86_64/ecx-2300f-peg/board.rs index 1b295342..e2f35550 100644 --- a/platform/x86_64/ecx-2300f-peg/board.rs +++ b/platform/x86_64/ecx-2300f-peg/board.rs @@ -156,28 +156,28 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { pub const ROOT_PCI_MAX_BUS: usize = 3; pub const ROOT_PCI_DEVS: [HvPciDevConfig; 19] = [ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), // host bridge - pci_dev!(0x0, 0x0, 0x1, 0x0, VpciDevType::Physical), // PCI bridge - pci_dev!(0x0, 0x0, 0x1, 0x1, VpciDevType::Physical), // PCI bridge - // pci_dev!(0x0, 0x0, 0x2, 0x0, VpciDevType::Physical), // display controller - pci_dev!(0x0, 0x0, 0x8, 0x0, VpciDevType::Physical), // system peripheral - pci_dev!(0x0, 0x0, 0x12, 0x0, VpciDevType::Physical), // signal processing controller - pci_dev!(0x0, 0x0, 0x14, 0x0, VpciDevType::Physical), // USB controller - pci_dev!(0x0, 0x0, 0x14, 0x2, VpciDevType::Physical), // RAM memory - pci_dev!(0x0, 0x0, 0x14, 0x5, VpciDevType::Physical), // SD host controller - pci_dev!(0x0, 0x0, 0x15, 0x0, VpciDevType::Physical), // serial bus controller - pci_dev!(0x0, 0x0, 0x16, 0x0, VpciDevType::Physical), // communication controller - pci_dev!(0x0, 0x0, 0x16, 0x3, VpciDevType::Physical), // serial controller - pci_dev!(0x0, 0x0, 0x17, 0x0, VpciDevType::Physical), // SATA controller - pci_dev!(0x0, 0x0, 0x1d, 0x0, VpciDevType::Physical), // PCI bridge - // pci_dev!(0x0, 0x0, 0x1f, 0x0, VpciDevType::Physical), // ISA bridge - pci_dev!(0x0, 0x0, 0x1f, 0x3, VpciDevType::Physical), // audio device - pci_dev!(0x0, 0x0, 0x1f, 0x4, VpciDevType::Physical), // SMBus - pci_dev!(0x0, 0x0, 0x1f, 0x5, VpciDevType::Physical), // serial bus controller - // pci_dev!(0x0, 0x0, 0x1f, 0x6, VpciDevType::Physical), // ethernet controller - pci_dev!(0x0, 0x2, 0x0, 0x0, VpciDevType::Physical), // VGA controller - pci_dev!(0x0, 0x2, 0x0, 0x1, VpciDevType::Physical), // audio device - pci_dev!(0x0, 0x3, 0x0, 0x0, VpciDevType::Physical), // ethernet controller + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), // host bridge + pci_dev!(0x0, 0x0, 0x1, 0x0 => 0x0, 0x1, 0x0, VpciDevType::Physical), // PCI bridge + pci_dev!(0x0, 0x0, 0x1, 0x1 => 0x0, 0x1, 0x1, VpciDevType::Physical), // PCI bridge + // pci_dev!(0x0, 0x0, 0x2, 0x0 => 0x0, 0x2, 0x0, VpciDevType::Physical), // display controller + pci_dev!(0x0, 0x0, 0x8, 0x0 => 0x0, 0x8, 0x0, VpciDevType::Physical), // system peripheral + pci_dev!(0x0, 0x0, 0x12, 0x0 => 0x0, 0x12, 0x0, VpciDevType::Physical), // signal processing controller + pci_dev!(0x0, 0x0, 0x14, 0x0 => 0x0, 0x14, 0x0, VpciDevType::Physical), // USB controller + pci_dev!(0x0, 0x0, 0x14, 0x2 => 0x0, 0x14, 0x2, VpciDevType::Physical), // RAM memory + pci_dev!(0x0, 0x0, 0x14, 0x5 => 0x0, 0x14, 0x5, VpciDevType::Physical), // SD host controller + pci_dev!(0x0, 0x0, 0x15, 0x0 => 0x0, 0x15, 0x0, VpciDevType::Physical), // serial bus controller + pci_dev!(0x0, 0x0, 0x16, 0x0 => 0x0, 0x16, 0x0, VpciDevType::Physical), // communication controller + pci_dev!(0x0, 0x0, 0x16, 0x3 => 0x0, 0x16, 0x3, VpciDevType::Physical), // serial controller + pci_dev!(0x0, 0x0, 0x17, 0x0 => 0x0, 0x17, 0x0, VpciDevType::Physical), // SATA controller + pci_dev!(0x0, 0x0, 0x1d, 0x0 => 0x0, 0x1d, 0x0, VpciDevType::Physical), // PCI bridge + // pci_dev!(0x0, 0x0, 0x1f, 0x0 => 0x0, 0x1f, 0x0, VpciDevType::Physical), // ISA bridge + pci_dev!(0x0, 0x0, 0x1f, 0x3 => 0x0, 0x1f, 0x3, VpciDevType::Physical), // audio device + pci_dev!(0x0, 0x0, 0x1f, 0x4 => 0x0, 0x1f, 0x4, VpciDevType::Physical), // SMBus + pci_dev!(0x0, 0x0, 0x1f, 0x5 => 0x0, 0x1f, 0x5, VpciDevType::Physical), // serial bus controller + // pci_dev!(0x0, 0x0, 0x1f, 0x6 => 0x0, 0x1f, 0x6, VpciDevType::Physical), // ethernet controller + pci_dev!(0x0, 0x2, 0x0, 0x0 => 0x2, 0x0, 0x0, VpciDevType::Physical), // VGA controller + pci_dev!(0x0, 0x2, 0x0, 0x1 => 0x2, 0x0, 0x1, VpciDevType::Physical), // audio device + pci_dev!(0x0, 0x3, 0x0, 0x0 => 0x3, 0x0, 0x0, VpciDevType::Physical), // ethernet controller ]; #[cfg(all(feature = "graphics"))] diff --git a/platform/x86_64/nuc14mnk/board.rs b/platform/x86_64/nuc14mnk/board.rs index bceb2bfe..93d4149c 100644 --- a/platform/x86_64/nuc14mnk/board.rs +++ b/platform/x86_64/nuc14mnk/board.rs @@ -179,25 +179,25 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { pub const ROOT_PCI_MAX_BUS: usize = 2; pub const ROOT_PCI_DEVS: [HvPciDevConfig; 18] = [ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), // host bridge - pci_dev!(0x0, 0x0, 0x2, 0x0, VpciDevType::Physical), // VGA controller - pci_dev!(0x0, 0x0, 0x4, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0x8, 0x0, VpciDevType::Physical), - pci_dev!(0x0, 0x0, 0xa, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), // host bridge + pci_dev!(0x0, 0x0, 0x2, 0x0 => 0x0, 0x2, 0x0, VpciDevType::Physical), // VGA controller + pci_dev!(0x0, 0x0, 0x4, 0x0 => 0x0, 0x4, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0x8, 0x0 => 0x0, 0x8, 0x0, VpciDevType::Physical), + pci_dev!(0x0, 0x0, 0xa, 0x0 => 0x0, 0xa, 0x0, VpciDevType::Physical), // pci_dev!(0x0, 0x0, 0xd, 0x0), // USB controller - pci_dev!(0x0, 0x0, 0x12, 0x0, VpciDevType::Physical), // serial controller - pci_dev!(0x0, 0x0, 0x14, 0x0, VpciDevType::Physical), // USB controller - pci_dev!(0x0, 0x0, 0x14, 0x2, VpciDevType::Physical), // RAM memory - pci_dev!(0x0, 0x0, 0x14, 0x3, VpciDevType::Physical), // network controller - pci_dev!(0x0, 0x0, 0x16, 0x0, VpciDevType::Physical), // communication controller - pci_dev!(0x0, 0x0, 0x1c, 0x0, VpciDevType::Physical), // PCI bridge - pci_dev!(0x0, 0x0, 0x1d, 0x0, VpciDevType::Physical), // PCI bridge - pci_dev!(0x0, 0x0, 0x1f, 0x0, VpciDevType::Physical), // ISA bridge - pci_dev!(0x0, 0x0, 0x1f, 0x3, VpciDevType::Physical), // audio controller - pci_dev!(0x0, 0x0, 0x1f, 0x4, VpciDevType::Physical), // SMBus - pci_dev!(0x0, 0x0, 0x1f, 0x5, VpciDevType::Physical), // serial bus controller - pci_dev!(0x0, 0x1, 0x0, 0x0, VpciDevType::Physical), // ethernet controller - pci_dev!(0x0, 0x2, 0x0, 0x0, VpciDevType::Physical), // memory controller + pci_dev!(0x0, 0x0, 0x12, 0x0 => 0x0, 0x12, 0x0, VpciDevType::Physical), // serial controller + pci_dev!(0x0, 0x0, 0x14, 0x0 => 0x0, 0x14, 0x0, VpciDevType::Physical), // USB controller + pci_dev!(0x0, 0x0, 0x14, 0x2 => 0x0, 0x14, 0x2, VpciDevType::Physical), // RAM memory + pci_dev!(0x0, 0x0, 0x14, 0x3 => 0x0, 0x14, 0x3, VpciDevType::Physical), // network controller + pci_dev!(0x0, 0x0, 0x16, 0x0 => 0x0, 0x16, 0x0, VpciDevType::Physical), // communication controller + pci_dev!(0x0, 0x0, 0x1c, 0x0 => 0x0, 0x1c, 0x0, VpciDevType::Physical), // PCI bridge + pci_dev!(0x0, 0x0, 0x1d, 0x0 => 0x0, 0x1d, 0x0, VpciDevType::Physical), // PCI bridge + pci_dev!(0x0, 0x0, 0x1f, 0x0 => 0x0, 0x1f, 0x0, VpciDevType::Physical), // ISA bridge + pci_dev!(0x0, 0x0, 0x1f, 0x3 => 0x0, 0x1f, 0x3, VpciDevType::Physical), // audio controller + pci_dev!(0x0, 0x0, 0x1f, 0x4 => 0x0, 0x1f, 0x4, VpciDevType::Physical), // SMBus + pci_dev!(0x0, 0x0, 0x1f, 0x5 => 0x0, 0x1f, 0x5, VpciDevType::Physical), // serial bus controller + pci_dev!(0x0, 0x1, 0x0, 0x0 => 0x1, 0x0, 0x0, VpciDevType::Physical), // ethernet controller + pci_dev!(0x0, 0x2, 0x0, 0x0 => 0x2, 0x0, 0x0, VpciDevType::Physical), // memory controller ]; #[cfg(all(feature = "graphics"))] diff --git a/platform/x86_64/qemu/board.rs b/platform/x86_64/qemu/board.rs index a0a420b5..a5cd3ad8 100644 --- a/platform/x86_64/qemu/board.rs +++ b/platform/x86_64/qemu/board.rs @@ -150,14 +150,14 @@ pub const ROOT_PCI_CONFIG: [HvPciConfig; 1] = [HvPciConfig { pub const ROOT_PCI_MAX_BUS: usize = 1; pub const ROOT_PCI_DEVS: [HvPciDevConfig; 7] = [ - pci_dev!(0x0, 0x0, 0x0, 0x0, VpciDevType::Physical), // host bridge - pci_dev!(0x0, 0x0, 0x1, 0x0, VpciDevType::Physical), // VGA controller - pci_dev!(0x0, 0x0, 0x2, 0x0, VpciDevType::Physical), // Ethernet controller - pci_dev!(0x0, 0x0, 0x3, 0x0, VpciDevType::Physical), // PCI bridge - pci_dev!(0x0, 0x0, 0x1f, 0x0, VpciDevType::Physical), // ISA bridge - pci_dev!(0x0, 0x0, 0x1f, 0x2, VpciDevType::Physical), // SATA controller - // pci_dev!(0x0, 0x0, 0x1f, 0x3, VpciDevType::Physical), // SMBus - pci_dev!(0x0, 0x1, 0x0, 0x0, VpciDevType::Physical), // SCSI controller + pci_dev!(0x0, 0x0, 0x0, 0x0 => 0x0, 0x0, 0x0, VpciDevType::Physical), // host bridge + pci_dev!(0x0, 0x0, 0x1, 0x0 => 0x0, 0x1, 0x0, VpciDevType::Physical), // VGA controller + pci_dev!(0x0, 0x0, 0x2, 0x0 => 0x0, 0x2, 0x0, VpciDevType::Physical), // Ethernet controller + pci_dev!(0x0, 0x0, 0x3, 0x0 => 0x0, 0x3, 0x0, VpciDevType::Physical), // PCI bridge + pci_dev!(0x0, 0x0, 0x1f, 0x0 => 0x0, 0x1f, 0x0, VpciDevType::Physical), // ISA bridge + pci_dev!(0x0, 0x0, 0x1f, 0x2 => 0x0, 0x1f, 0x2, VpciDevType::Physical), // SATA controller + // pci_dev!(0x0, 0x0, 0x1f, 0x3 => 0x0, 0x1f, 0x3, VpciDevType::Physical), // SMBus + pci_dev!(0x0, 0x1, 0x0, 0x0 => 0x1, 0x0, 0x0, VpciDevType::Physical), // SCSI controller ]; #[cfg(all(feature = "graphics"))] diff --git a/platform/x86_64/qemu/configs/zone1_linux.json b/platform/x86_64/qemu/configs/zone1_linux.json index 0d5795ca..de122b12 100644 --- a/platform/x86_64/qemu/configs/zone1_linux.json +++ b/platform/x86_64/qemu/configs/zone1_linux.json @@ -117,6 +117,9 @@ "bus": "0x0", "device": "0x0", "function": "0x0", + "v_bus": "0x0", + "v_device": "0x0", + "v_function": "0x0", "dev_type": "0x0" } ] diff --git a/src/config.rs b/src/config.rs index aecc2759..9fd10ea7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -218,17 +218,23 @@ pub struct HvPciDevConfig { pub bus: u8, pub device: u8, pub function: u8, + pub v_bus: u8, + pub v_device: u8, + pub v_function: u8, pub dev_type: VpciDevType, } #[macro_export] macro_rules! pci_dev { - ($domain:expr, $bus:expr, $dev:expr, $func:expr, $dev_type:expr) => { + ($domain:expr, $bus:expr, $dev:expr, $func:expr => $v_bus:expr, $v_dev:expr, $v_func:expr, $dev_type:expr) => { HvPciDevConfig { domain: $domain, bus: $bus, device: $dev, function: $func, + v_bus: $v_bus, + v_device: $v_dev, + v_function: $v_func, dev_type: $dev_type, } }; @@ -257,6 +263,10 @@ pub struct HvDwcAtuConfig { // set 1 if io base use atu0, when hvisor need set mmio for io // normally, when num-viewport less than 4, io_cfg_atu_shared is 1, otherwise is 0 pub io_cfg_atu_shared: u64, + // choose the atu index for io and cfg access, when io_cfg_atu_shared is 1, io and cfg use the same atu index, otherwise use different atu index + pub io_atu_index: u64, + // Shared hardware interrupt ID for this DWC RC MSI block + pub dw_msi_irq: u64, } impl HvDwcAtuConfig { @@ -273,6 +283,8 @@ impl HvDwcAtuConfig { cfg_base: 0, cfg_size: 0, io_cfg_atu_shared: 0, + io_atu_index: 0, + dw_msi_irq: 0, } } } diff --git a/src/consts.rs b/src/consts.rs index 88a3f3e9..869ab32f 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -88,6 +88,7 @@ pub fn hv_end() -> VirtAddr { pub const IPI_EVENT_CLEAR_INJECT_IRQ: usize = 4; pub const IPI_EVENT_UPDATE_HART_LINE: usize = 5; pub const IPI_EVENT_SEND_IPI: usize = 6; +pub const IPI_EVENT_DWC_MSI_INJECT: usize = 9; /// ipi events for vcpu management pub const IPI_EVENT_VCPU_SUSPEND: usize = 7; pub const IPI_EVENT_VCPU_RESUME: usize = 8; diff --git a/src/device/irqchip/gicv3/mod.rs b/src/device/irqchip/gicv3/mod.rs index b97d81c5..e281d44d 100644 --- a/src/device/irqchip/gicv3/mod.rs +++ b/src/device/irqchip/gicv3/mod.rs @@ -45,6 +45,7 @@ use crate::hypercall::SGI_IPI_ID; use crate::zone::Zone; const ICH_HCR_UIE: u64 = 1 << 1; + //TODO: add Distributor init pub fn gicc_init() { //TODO: add Redistributor init @@ -110,6 +111,9 @@ pub fn gicv3_handle_irq_el1() { warn!("skip sgi {}", irq_id); deactivate_irq(irq_id); } else { + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + let mut is_dwc_msi_irq = false; + if irq_id == 27 { // virtual timer interrupt TIMER_INTERRUPT_COUNTER.fetch_add(1, core::sync::atomic::Ordering::SeqCst); @@ -128,12 +132,30 @@ pub fn gicv3_handle_irq_el1() { } else if irq_id > 31 { //inject phy irq trace!("*** get spi_irq id = {}", irq_id); + + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + { + if let Some(domain_id) = + crate::pci::dwc_msi::get_domain_id_by_irq(irq_id as u32) + { + is_dwc_msi_irq = true; + crate::pci::dwc_msi::dwc_msi_transfer_and_inject(domain_id, irq_id); + } + } } else { warn!("not konw irq id = {}", irq_id); } + + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + if irq_id != 25 && !is_dwc_msi_irq { + inject_irq(irq_id, true); + } + + #[cfg(not(all(feature = "dwc_pcie", feature = "dwc_msi")))] if irq_id != 25 { inject_irq(irq_id, true); } + deactivate_irq(irq_id); } } diff --git a/src/device/irqchip/gicv3/vgic.rs b/src/device/irqchip/gicv3/vgic.rs index 8caab289..4cf4926c 100644 --- a/src/device/irqchip/gicv3/vgic.rs +++ b/src/device/irqchip/gicv3/vgic.rs @@ -309,7 +309,19 @@ pub fn vgicv3_dist_handler(mmio: &mut MMIOAccess, _arg: usize) -> HvResult { match reg { reg if reg_range(GICD_IROUTER, 1024, 8).contains(®) => { - vgicv3_handle_irq_ops(mmio, (reg - GICD_IROUTER) as u32 / 8) + let irq = (reg - GICD_IROUTER) as u32 / 8; + + #[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] + { + // For zone0, the domainmsiinfo is empty, but it will always register the intterrupt to cpu0 + // So this remap operation is needed for other zones + if mmio.is_write && crate::pci::dwc_msi::is_dwc_msi_hwirq(irq) { + info!("remap dwc msi hwirq {} to cpu0!", irq); + mmio.value = 0; + } + } + + vgicv3_handle_irq_ops(mmio, irq) } reg if reg_range(GICD_ITARGETSR, 1024, 1).contains(®) => { vgicv3_handle_irq_ops(mmio, (reg - GICD_ITARGETSR) as u32) diff --git a/src/event.rs b/src/event.rs index cadbff2b..adf6c815 100644 --- a/src/event.rs +++ b/src/event.rs @@ -18,8 +18,8 @@ use crate::{ arch::cpu::this_cpu_id, arch::ipi::{arch_check_events, arch_prepare_send_event, arch_send_event}, consts::{ - IPI_EVENT_CLEAR_INJECT_IRQ, IPI_EVENT_SEND_IPI, IPI_EVENT_UPDATE_HART_LINE, - IPI_EVENT_VCPU_SUSPEND, MAX_CPU_NUM, + IPI_EVENT_CLEAR_INJECT_IRQ, IPI_EVENT_DWC_MSI_INJECT, IPI_EVENT_SEND_IPI, + IPI_EVENT_UPDATE_HART_LINE, IPI_EVENT_VCPU_SUSPEND, MAX_CPU_NUM, }, cpu_data::{this_cpu_data, vcpu_suspend, CpuSet}, device::{irqchip::inject_irq, virtio_trampoline::handle_virtio_irq}, @@ -113,6 +113,18 @@ pub fn check_events() -> bool { inject_irq(IRQ_WAKEUP_VIRTIO_DEVICE, false); true } + Some(IPI_EVENT_DWC_MSI_INJECT) => { + #[cfg(all( + target_arch = "aarch64", + feature = "gicv3", + feature = "dwc_pcie", + feature = "dwc_msi" + ))] + { + crate::pci::dwc_msi::handle_dwc_msi_inject_event(); + } + true + } #[cfg(feature = "virtio_pci")] Some(IPI_EVENT_VIRTIO_PCI_CONFIG) => { inject_irq(IRQ_WAKEUP_VIRTIO_PCI_CONFIG, false); diff --git a/src/main.rs b/src/main.rs index 736d209b..639f8783 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,7 +73,7 @@ use arch::{cpu::cpu_start, entry::arch_entry}; use config::root_zone_config; use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use cpu_data::PerCpu; -#[cfg(feature = "pci")] +#[cfg(all(feature = "pci", not(feature = "pci_init_delay")))] use pci::pci_config::hvisor_pci_init; static INITED_CPUS: AtomicU32 = AtomicU32::new(0); @@ -137,7 +137,7 @@ fn primary_init_early() { let root_config = root_zone_config(); - #[cfg(feature = "pci")] + #[cfg(all(feature = "pci", not(feature = "pci_init_delay")))] if root_config.num_pci_bus > 0 { let num_pci_bus = root_config.num_pci_bus as usize; let _ = hvisor_pci_init(&root_config.pci_config[..num_pci_bus]); diff --git a/src/pci/dwc_msi.rs b/src/pci/dwc_msi.rs new file mode 100644 index 00000000..fbdc338c --- /dev/null +++ b/src/pci/dwc_msi.rs @@ -0,0 +1,342 @@ +// Copyright (c) 2025 Syswonder +// hvisor is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +// FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +// +// Syswonder Website: +// https://www.syswonder.org +// +// Authors: +// + +use alloc::collections::btree_map::BTreeMap; +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use alloc::collections::VecDeque; +use alloc::vec::Vec; +use spin::{Lazy, Mutex}; + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use crate::cpu_data::this_cpu_data; +use crate::error::HvResult; +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use crate::event::send_event; +use crate::memory::Frame; +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +use crate::{ + consts::{IPI_EVENT_DWC_MSI_INJECT, MAX_CPU_NUM}, + device::irqchip::inject_irq, + hypercall::SGI_IPI_ID, +}; + +// DBI MSI register offsets +pub const PCIE_MSI_ADDR_LO: usize = 0x820; +pub const PCIE_MSI_ADDR_HI: usize = 0x824; +pub const PCIE_MSI_INTR0_ENABLE: usize = 0x828; +pub const PCIE_MSI_INTR0_MASK: usize = 0x82c; +pub const PCIE_MSI_INTR0_STATUS: usize = 0x830; + +/// DesignWare native MSI domain configuration +/// Each PCIe RC (domain) has a set of 32 MSI vectors that can be distributed +/// across multiple VMs. This structure manages the allocation of these vectors +/// and provides storage for the MSI doorbell address. +#[derive(Debug, Copy, Clone)] +pub struct DwMsiBitOwner { + /// Target vCPU used for injection when forwarding this MSI + pub target_cpu: usize, + /// First hardware MSI bit in this allocation + pub start_hwirq_bit: u32, + /// Number of contiguous vectors in this allocation + pub num_vectors: u32, +} + +impl DwMsiBitOwner { + #[inline] + pub fn contains_hwbit(&self, hwbit: u32) -> bool { + hwbit >= self.start_hwirq_bit && hwbit < self.start_hwirq_bit + self.num_vectors + } +} + +#[derive(Debug)] +pub struct DwMsiDomain { + /// Next MSI vector index to allocate (0-31) + /// When a VM allocates N vectors, it gets indices [next_alloc, next_alloc+N) + pub next_alloc: u32, + + /// Physical page frame for the doorbell address + /// Each domain has one unique doorbell that is written by hvisor + pub doorbell_frame: Frame, + + /// Shared physical interrupt line used by this DWC RC MSI block + pub irq: u32, + + /// Per-zone allocation records for fast hwbit -> zone/cpu lookup + pub bit_owners: Vec, +} + +impl DwMsiDomain { + /// Create a new DW MSI domain for a PCIe RC + pub fn new(irq: u32) -> HvResult { + let doorbell_frame = Frame::new_zero()?; + + Ok(Self { + next_alloc: 0, + doorbell_frame, + irq, + bit_owners: Vec::new(), + }) + } + + /// Allocate a contiguous range of MSI vectors for a VM + /// Returns the starting vector index if successful, or error if not enough vectors available + pub fn allocate(&mut self, num_vectors: u32) -> HvResult { + // Check if there are enough vectors left (32 total vectors per domain) + if self.next_alloc + num_vectors > 32 { + return hv_result_err!(EINVAL, "Not enough MSI vectors available in domain"); + } + + let alloc_offset = self.next_alloc; + self.next_alloc += num_vectors; + + Ok(alloc_offset) + } + + /// Allocate MSI vectors for a zone and record the ownership mapping. + pub fn allocate_for_cpu(&mut self, target_cpu: usize, num_vectors: u32) -> HvResult { + let start_hwirq_bit = self.allocate(num_vectors)?; + self.bit_owners.push(DwMsiBitOwner { + target_cpu, + start_hwirq_bit, + num_vectors, + }); + info!( + "DW MSI owner added: cpu {}, vectors {}, range [{}..{}), total_owner_records {}", + target_cpu, + num_vectors, + start_hwirq_bit, + start_hwirq_bit + num_vectors, + self.bit_owners.len() + ); + Ok(start_hwirq_bit) + } + + /// Find the owner record by hardware MSI bit. + pub fn find_owner_by_hwbit(&self, hwbit: u32) -> Option { + self.bit_owners + .iter() + .copied() + .find(|owner| owner.contains_hwbit(hwbit)) + } + + /// Get the physical address of the doorbell for this domain + pub fn doorbell_paddr(&self) -> u64 { + self.doorbell_frame.start_paddr() as u64 + } +} + +/// Global storage for DW MSI domain configurations +/// Indexed by domain ID +pub static DW_MSI_DOMAINS: Lazy>> = + Lazy::new(|| Mutex::new(BTreeMap::new())); + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +static DWC_MSI_IPI_PENDING_IRQS: Lazy>>> = Lazy::new(|| { + let mut queues = Vec::with_capacity(MAX_CPU_NUM); + for _ in 0..MAX_CPU_NUM { + queues.push(VecDeque::new()); + } + Mutex::new(queues) +}); + +/// Initialize DW MSI domain for a given domain ID +pub fn init_dwc_msi_domain(domain_id: u8, irq: u32) -> HvResult<()> { + let domain = DwMsiDomain::new(irq)?; + let doorbell_paddr = domain.doorbell_paddr(); + DW_MSI_DOMAINS.lock().insert(domain_id, domain); + info!( + "Initialized DW MSI domain {} with doorbell at {:#x}, irq {}", + domain_id, doorbell_paddr, irq + ); + Ok(()) +} + +/// Get mutable reference to a DW MSI domain +pub fn get_dwc_msi_domain_mut( + domain_id: u8, +) -> Option>> { + let domains = DW_MSI_DOMAINS.lock(); + if domains.contains_key(&domain_id) { + drop(domains); + Some(DW_MSI_DOMAINS.lock()) + } else { + None + } +} + +/// Get the doorbell physical address for a specific domain +/// Returns 0 if domain not found +pub fn get_domain_doorbell_paddr(domain_id: u8) -> u64 { + let domains = DW_MSI_DOMAINS.lock(); + domains + .get(&domain_id) + .map(|domain| domain.doorbell_paddr()) + .unwrap_or(0) +} + +/// Get the shared hardware MSI IRQ for a specific domain +/// Returns 0 if domain not found +pub fn get_domain_msi_irq(domain_id: u8) -> u32 { + let domains = DW_MSI_DOMAINS.lock(); + domains + .get(&domain_id) + .map(|domain| domain.irq) + .unwrap_or(0) +} + +/// Check whether an IRQ is used as a shared DWC MSI hardware interrupt +pub fn is_dwc_msi_irq(irq: u32) -> bool { + let domains = DW_MSI_DOMAINS.lock(); + domains.values().any(|domain| domain.irq == irq) +} + +/// Check whether an IRQ matches any configured DWC MSI hardware interrupt. +pub fn is_dwc_msi_hwirq(irq: u32) -> bool { + is_dwc_msi_irq(irq) +} + +/// Return all domain IDs that share the specified DWC MSI hardware IRQ. +pub fn get_domains_by_irq(irq: u32) -> Vec { + let domains = DW_MSI_DOMAINS.lock(); + domains + .iter() + .filter_map(|(domain_id, domain)| { + if domain.irq == irq { + Some(*domain_id) + } else { + None + } + }) + .collect() +} + +/// Find bit ownership for a specific domain and hardware MSI bit. +pub fn get_domain_owner_by_hwirq_bit(domain_id: u8, hwbit: u32) -> Option { + let domains = DW_MSI_DOMAINS.lock(); + domains + .get(&domain_id) + .and_then(|domain| domain.find_owner_by_hwbit(hwbit)) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn get_domain_dbi_base(domain_id: u8) -> Option { + let ecam_base = crate::platform::ROOT_PCI_CONFIG + .iter() + .find(|cfg| cfg.domain == domain_id) + .map(|cfg| cfg.ecam_base)?; + + crate::platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|cfg| cfg.ecam_base == ecam_base) + .map(|cfg| cfg.dbi_base as usize) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn first_set_bit(mask: u32) -> Option { + if mask == 0 { + None + } else { + Some(mask.trailing_zeros()) + } +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn find_target_cpu(domain_id: u8, irq_bit: usize) -> Option { + get_domain_owner_by_hwirq_bit(domain_id, irq_bit as u32).map(|owner| owner.target_cpu) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn dwc_msi_pending_irq_bit(domain_id: u8) -> Option { + let dbi_base = get_domain_dbi_base(domain_id)?; + let status = + unsafe { core::ptr::read_volatile((dbi_base + PCIE_MSI_INTR0_STATUS) as *const u32) }; + first_set_bit(status) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +pub fn get_domain_id_by_irq(irq: u32) -> Option { + let domains = DW_MSI_DOMAINS.lock(); + domains.iter().find_map(|(domain_id, domain)| { + if domain.irq == irq { + Some(*domain_id) + } else { + None + } + }) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn enqueue_dwc_msi_ipi_irq(target_cpu: usize, irq_id: usize) -> bool { + if target_cpu >= MAX_CPU_NUM { + error!( + "DWC MSI enqueue failed: invalid target cpu {}, irq {}", + target_cpu, irq_id + ); + return false; + } + + let mut queues = DWC_MSI_IPI_PENDING_IRQS.lock(); + if let Some(queue) = queues.get_mut(target_cpu) { + queue.push_back(irq_id); + true + } else { + false + } +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +fn pop_dwc_msi_ipi_irq(cpu_id: usize) -> Option { + if cpu_id >= MAX_CPU_NUM { + return None; + } + + let mut queues = DWC_MSI_IPI_PENDING_IRQS.lock(); + queues.get_mut(cpu_id).and_then(|queue| queue.pop_front()) +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +pub fn dwc_msi_transfer_and_inject(domain_id: u8, irq_id: usize) { + if let Some(irq_bit) = dwc_msi_pending_irq_bit(domain_id) { + if let Some(target_cpu) = find_target_cpu(domain_id, irq_bit as usize) { + if target_cpu == 0 { + inject_irq(irq_id, true); + } else { + if enqueue_dwc_msi_ipi_irq(target_cpu, irq_id) { + send_event(target_cpu, SGI_IPI_ID as usize, IPI_EVENT_DWC_MSI_INJECT); + } else { + error!( + "Failed to enqueue DWC MSI irq {} for target cpu {}", + irq_id, target_cpu + ); + } + } + } else { + error!("No target cpu found for DWC msi irq bit {}!", irq_bit); + } + } else { + error!("No pending DWC msi irq found!"); + } +} + +#[cfg(all(feature = "dwc_pcie", feature = "dwc_msi"))] +pub fn handle_dwc_msi_inject_event() { + let cpu_id = this_cpu_data().id; + if let Some(irq_id) = pop_dwc_msi_ipi_irq(cpu_id) { + inject_irq(irq_id, true); + } else { + warn!("No pending DWC MSI IPI irq for cpu {}", cpu_id); + } +} diff --git a/src/pci/mod.rs b/src/pci/mod.rs index 74030417..dcf8899e 100644 --- a/src/pci/mod.rs +++ b/src/pci/mod.rs @@ -24,6 +24,9 @@ pub mod pci_handler; pub mod pci_struct; pub mod vpci_dev; +#[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] +pub mod dwc_msi; + #[cfg(test)] pub mod pci_test; diff --git a/src/pci/pci_access.rs b/src/pci/pci_access.rs index b13a36d9..f2c6c77f 100644 --- a/src/pci/pci_access.rs +++ b/src/pci/pci_access.rs @@ -392,7 +392,7 @@ impl PciMem { warn!("unkown bar type: {:#?}", self.bar_type); } } - info!("self.virtual_value = {}", val); + // info!("self.virtual_value = {}", val); self.virtual_value = val; } @@ -1067,7 +1067,7 @@ pub enum BridgeField { LatencyTime, HeaderType, Bist, - Bar, + Bar(usize), PrimaryBusNumber, SecondaryBusNumber, SubordinateBusNumber, @@ -1103,7 +1103,7 @@ impl Debug for BridgeField { BridgeField::LatencyTime => write!(f, "LatencyTime"), BridgeField::HeaderType => write!(f, "HeaderType"), BridgeField::Bist => write!(f, "Bist"), - BridgeField::Bar => write!(f, "Bar"), + BridgeField::Bar(slot) => write!(f, "Bar({})", slot), BridgeField::PrimaryBusNumber => write!(f, "PrimaryBusNumber"), BridgeField::SecondaryBusNumber => write!(f, "SecondaryBusNumber"), BridgeField::SubordinateBusNumber => write!(f, "SubordinateBusNumber"), @@ -1141,7 +1141,7 @@ impl PciField for BridgeField { BridgeField::LatencyTime => 0x0d, BridgeField::HeaderType => 0x0e, BridgeField::Bist => 0x0f, - BridgeField::Bar => 0x10, + BridgeField::Bar(slot) => 0x10 + slot * 4, BridgeField::PrimaryBusNumber => 0x18, BridgeField::SecondaryBusNumber => 0x19, BridgeField::SubordinateBusNumber => 0x1a, @@ -1176,7 +1176,7 @@ impl PciField for BridgeField { BridgeField::LatencyTime => 1, BridgeField::HeaderType => 1, BridgeField::Bist => 1, - BridgeField::Bar => 4, + BridgeField::Bar(_) => 4, BridgeField::PrimaryBusNumber => 1, BridgeField::SecondaryBusNumber => 1, BridgeField::SubordinateBusNumber => 1, @@ -1213,7 +1213,8 @@ impl BridgeField { (0x0d, 1) => BridgeField::LatencyTime, (0x0e, 1) => BridgeField::HeaderType, (0x0f, 1) => BridgeField::Bist, - (0x10, 4) | (0x14, 4) => BridgeField::Bar, + (0x10, 4) => BridgeField::Bar(0), + (0x14, 4) => BridgeField::Bar(1), (0x18, 1) => BridgeField::PrimaryBusNumber, (0x19, 1) => BridgeField::SecondaryBusNumber, (0x1a, 1) => BridgeField::SubordinateBusNumber, diff --git a/src/pci/pci_config.rs b/src/pci/pci_config.rs index f2235dc8..b280bbb0 100644 --- a/src/pci/pci_config.rs +++ b/src/pci/pci_config.rs @@ -77,6 +77,11 @@ pub static GLOBAL_PCIE_LIST: Lazy HvResult { warn!("begin {:#x?}", pci_config); + + // Track domains that have been initialized for DW MSI + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + let mut initialized_domains: alloc::vec::Vec = alloc::vec::Vec::new(); + #[cfg(any( feature = "ecam_pcie", feature = "dwc_pcie", @@ -162,10 +167,30 @@ pub fn hvisor_pci_init(pci_config: &[HvPciConfig]) -> HvResult { let e = rootcomplex.enumerate(Some(range), domain, allocator_opt); info!("begin enumerate {:#x?}", e); for node in e { - info!("node {:#?}", node); - GLOBAL_PCIE_LIST - .lock() - .insert(node.get_bdf(), ArcRwLockVirtualPciConfigSpace::new(node)); + // info!("node {:#?}", node); + let sriov_vfs = rootcomplex.create_sriov_vfs(&node); + + { + let mut global_pcie_list = GLOBAL_PCIE_LIST.lock(); + global_pcie_list.insert(node.get_bdf(), ArcRwLockVirtualPciConfigSpace::new(node)); + for vf in sriov_vfs { + global_pcie_list.insert(vf.get_bdf(), ArcRwLockVirtualPciConfigSpace::new(vf)); + } + } + } + + // Initialize DW MSI domain for this domain ID (only once per domain) + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if !initialized_domains.contains(&domain) { + let msi_irq = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|cfg| cfg.ecam_base == rootcomplex_config.ecam_base) + .map(|cfg| cfg.dw_msi_irq as u32) + .unwrap_or(0); + crate::pci::dwc_msi::init_dwc_msi_domain(domain, msi_irq)?; + initialized_domains.push(domain); + } } } info!("hvisor pci init done \n{:#?}", GLOBAL_PCIE_LIST); @@ -182,7 +207,7 @@ impl Zone { _num_pci_config: usize, ) -> HvResult { let mut inner = self.write(); - let mut guard = GLOBAL_PCIE_LIST.lock(); + let guard = GLOBAL_PCIE_LIST.lock(); for target_pci_config in pci_config { // Skip empty config if target_pci_config.ecam_base == 0 { @@ -254,9 +279,8 @@ impl Zone { .then_with(|| a.function.cmp(&b.function)) }); - let mut vbus_pre = bus_range_begin; - let mut bus_pre = bus_range_begin; - let mut device_pre = 0u8; + let mut domain_msi_count: u32 = 0; + let mut vdevice_pre = 0u8; let msix_backend = get_arch_msix_backend(); if let Some(x) = msix_backend.clone() { @@ -280,49 +304,12 @@ impl Zone { */ for dev_config in &filtered_devices { let bdf = Bdf::new_from_config(*dev_config); - // let bus = bdf.bus(); - // let device = bdf.device(); - // let function = bdf.function(); - - // /* - // * vfunction = if (bus != bus_pre || device != device_pre) && function != 0 - // * In practice, remapping is performed only for new devices whose function is not 0; - // * however, the check for function != 0 does not affect the final result. - // */ - // let vfunction = if bus != bus_pre || device != device_pre { - // 0 - // } else { - // function - // }; - - // let vbus = if bus > bus_pre { - // vbus_pre += 1; - // vbus_pre - // } else { - // vbus_pre - // }; - - // // Remap device number to be contiguous, starting from 0 - // let vdevice = if bus != bus_pre || device != device_pre { - // // New bus or new device, increment device counter - // if bus != bus_pre { - // vdevice_pre = 0; - // } else { - // vdevice_pre += 1; - // } - // vdevice_pre - // } else { - // // Same bus and device, keep the same virtual device number - // vdevice_pre - // }; - - // let vbdf = Bdf::new(bdf.domain(), vbus, vdevice, vfunction); - - // device_pre = device; - // bus_pre = bus; - - // TODO: adjust vbdf will cause line interrupt injecet error, so remove it temporarily - let vbdf = bdf; + let vbdf = Bdf::new( + bdf.domain(), + dev_config.v_bus, + dev_config.v_device, + dev_config.v_function, + ); info!("set bdf {:#?} to vbdf {:#?}", bdf, vbdf); @@ -359,17 +346,33 @@ impl Zone { { let mut vdev = dev.read().config_space.clone(); vdev.set_vbdf(vbdf); + let msi_count = vdev.get_msi_count(); + domain_msi_count += msi_count; inner.vpci_bus_mut().insert(vbdf, vdev); } else { - // Check if device is already allocated to another zone - if dev.get_zone_id().is_none() { - dev.set_zone_id(Some(_zone_id as u32)); - let mut vdev_inner = dev.read().config_space.clone(); - vdev_inner.set_vbdf(vbdf); - inner.vpci_bus_mut().insert(vbdf, vdev_inner); + // Allow allocation if zone_id is None (unassigned), or if zone_id is + // Some(0) and the device is a SRIOV VF (initially assigned to root zone + // during enumeration, can be reassigned to a guest zone). + let is_sriov_vf_from_root = dev.get_zone_id() == Some(0) + && dev.read().get_sriov_vf_info().is_some(); + let is_pf = dev.read().get_sriov_info().is_some(); + if dev.get_zone_id().is_none() || is_sriov_vf_from_root { + if is_pf && _zone_id != 0 { + warn!( + "The SR-IOV PF {:#x?} can only be assigned to the root VM", + bdf + ); + } else { + dev.set_zone_id(Some(_zone_id as u32)); + let mut vdev_inner = dev.read().config_space.clone(); + vdev_inner.set_vbdf(vbdf); + let msi_count = vdev_inner.get_msi_count(); + domain_msi_count += msi_count; + inner.vpci_bus_mut().insert(vbdf, vdev_inner); + } } else { warn!( - "Device {:#?} is already allocated to zone {:?}", + "Device {:#x?} is already allocated to zone {:?}", bdf, dev.get_zone_id() ); @@ -404,6 +407,48 @@ impl Zone { } } } + + // After processing all devices for this domain, allocate hardware MSI bits + if domain_msi_count > 0 { + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + // Get the DW MSI domain allocator and allocate hwbit + if let Some(mut domain_lock) = + crate::pci::dwc_msi::get_dwc_msi_domain_mut(target_domain) + { + if let Some(domain_msi) = domain_lock.get_mut(&target_domain) { + let zone_cpu_set = inner.cpu_set(); + let target_cpu = zone_cpu_set.first_cpu().unwrap_or(0); + match domain_msi.allocate_for_cpu(target_cpu, domain_msi_count) { + Ok(hwirq_bit) => { + // Register the MSI info for this domain + inner.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + hwirq_bit, + ); + } + Err(e) => { + warn!( + "Failed to allocate MSI for domain {}: {:?}", + target_domain, e + ); + } + } + } + } + } + + #[cfg(not(feature = "dwc_msi"))] + { + // Without dwc_msi feature, just register without hardware bit allocation + inner.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + 0, // hwirq_bit is 0 when not using dwc_msi + ); + } + } } info!("vpci bus init done\n {:#x?}", inner.vpci_bus()); Ok(()) @@ -437,11 +482,16 @@ impl Zone { } #[cfg(feature = "dwc_pcie")] { + // Encode domain_id into the arg parameter: arg = ecam_base + domain_id + // Since ecam_base is 4KB aligned, its low 12 bits are 0 + // domain_id (0-15) fits in the low bits without interfering + let encoded_arg = + rootcomplex_config.ecam_base as usize + (rootcomplex_config.domain as usize); inner.mmio_region_register( rootcomplex_config.ecam_base as usize, rootcomplex_config.ecam_size as usize, mmio_vpci_handler_dbi, - rootcomplex_config.ecam_base as usize, + encoded_arg, ); let extend_config = platform::ROOT_DWC_ATU_CONFIG @@ -505,6 +555,10 @@ impl Zone { extend_config.cfg_base as PciConfigAddress, rootcomplex_config.ecam_base as usize, ); + inner.atu_configs_mut().insert_cfg_base_mapping( + cfg1_base as PciConfigAddress, + rootcomplex_config.ecam_base as usize, + ); inner.atu_configs_mut().insert_io_base_mapping( rootcomplex_config.io_base as PciConfigAddress, rootcomplex_config.ecam_base as usize, diff --git a/src/pci/pci_handler.rs b/src/pci/pci_handler.rs index e118a966..ae1d6adc 100644 --- a/src/pci/pci_handler.rs +++ b/src/pci/pci_handler.rs @@ -14,12 +14,22 @@ // Authors: // +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +use alloc::collections::btree_map::BTreeMap; use alloc::string::String; +use alloc::vec::Vec; +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +use spin::Lazy; +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +use spin::Mutex; use crate::cpu_data::this_zone; use crate::error::HvResult; -use crate::memory::MMIOAccess; +use crate::memory::{mmio_perform_access, MMIOAccess}; use crate::memory::{GuestPhysAddr, HostPhysAddr, MemFlags, MemoryRegion}; +use crate::pci::pci_struct::{ + CapabilityType, SRIOV_CAP_SIZE, SRIOV_VF_BAR_END, SRIOV_VF_BAR_OFFSET, +}; use crate::zone::is_this_root_zone; use super::pci_access::{BridgeField, EndpointField, HeaderType, PciField, PciMemType}; @@ -32,20 +42,32 @@ use super::PciConfigAddress; use crate::zone::this_zone_id; #[cfg(feature = "dwc_pcie")] -use crate::{ - memory::mmio_perform_access, - pci::config_accessors::{ - dwc::DwcConfigRegionBackend, - dwc_atu::{ - AtuType, AtuUnroll, ATU_BASE, ATU_ENABLE_BIT, ATU_REGION_SIZE, PCIE_ATU_UNR_LIMIT, - PCIE_ATU_UNR_LOWER_BASE, PCIE_ATU_UNR_LOWER_TARGET, PCIE_ATU_UNR_REGION_CTRL1, - PCIE_ATU_UNR_REGION_CTRL2, PCIE_ATU_UNR_UPPER_BASE, PCIE_ATU_UNR_UPPER_LIMIT, - PCIE_ATU_UNR_UPPER_TARGET, - }, - PciRegionMmio, +use crate::pci::config_accessors::{ + dwc::DwcConfigRegionBackend, + dwc_atu::{ + AtuType, AtuUnroll, ATU_BASE, ATU_ENABLE_BIT, ATU_REGION_SIZE, PCIE_ATU_UNR_LIMIT, + PCIE_ATU_UNR_LOWER_BASE, PCIE_ATU_UNR_LOWER_TARGET, PCIE_ATU_UNR_REGION_CTRL1, + PCIE_ATU_UNR_REGION_CTRL2, PCIE_ATU_UNR_UPPER_BASE, PCIE_ATU_UNR_UPPER_LIMIT, + PCIE_ATU_UNR_UPPER_TARGET, }, + PciRegionMmio, +}; + +#[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] +use super::dwc_msi::{ + PCIE_MSI_ADDR_HI, PCIE_MSI_ADDR_LO, PCIE_MSI_INTR0_ENABLE, PCIE_MSI_INTR0_MASK, + PCIE_MSI_INTR0_STATUS, }; +#[cfg(not(feature = "dwc_msi"))] +const PCIE_MSI_ADDR_LO: usize = 0x820; +#[cfg(not(feature = "dwc_msi"))] +const PCIE_MSI_INTR0_STATUS: usize = 0x830; + +const SRIOV_CTRL_OFFSET: PciConfigAddress = 0x08; +const SRIOV_NUM_VFS_OFFSET: PciConfigAddress = 0x10; +const SRIOV_CTRL_VF_ENABLE: u16 = 1 << 0; + macro_rules! pci_log { ($($arg:tt)*) => { // info!($($arg)*); @@ -215,6 +237,342 @@ fn handle_virtio_pci_write( // } // } +fn collect_vf_device_copies( + vf_host_bdf: super::pci_struct::Bdf, +) -> Vec { + let mut devices = Vec::new(); + + { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + for dev in vbus.devs_ref().values() { + if dev.get_bdf() == vf_host_bdf { + devices.push(dev.clone()); + } + } + } + + if let Some(dev) = GLOBAL_PCIE_LIST.lock().get(&vf_host_bdf).cloned() { + devices.push(dev); + } + + devices +} + +fn sync_sriov_vf_bar_state( + pf_dev: ArcRwLockVirtualPciConfigSpace, + offset: PciConfigAddress, + size: usize, + value: usize, +) -> HvResult { + if size != 4 { + return Ok(false); + } + + let Some((cap_offset, vf_bdfs)) = + pf_dev.with_sriov_info(|sriov_info| (sriov_info.cap_offset, sriov_info.vf_bdfs.clone())) + else { + return Ok(false); + }; + + let vf_bar_start = cap_offset + SRIOV_VF_BAR_OFFSET; + if offset < vf_bar_start || offset >= cap_offset + SRIOV_VF_BAR_END { + return Ok(false); + } + + let relative = offset - vf_bar_start; + if (relative & 0x3) != 0 { + return Ok(false); + } + let slot = (relative / 4) as usize; + + let Some((bar_type, bar_size)) = pf_dev.with_sriov_info(|sriov_info| { + ( + sriov_info.vf_bars[slot].get_type(), + sriov_info.vf_bars[slot].get_size(), + ) + }) else { + return Ok(false); + }; + + if (value & 0xfffffff0) == 0xfffffff0 { + // PF-side SR-IOV BAR probing only queries the template in the ext cap. + // VF BAR state is meaningful only after PF programs a valid BAR value. + return Ok(true); + } + + let pf_value = match bar_type { + PciMemType::Mem64Low => { + let low = pf_dev.read_hw(offset, size)? as u64; + let high = pf_dev.read_hw(offset + 4, size)? as u64; + (low | (high << 32)) & !0xf + } + PciMemType::Mem64High => { + let low = pf_dev.read_hw(offset - 4, size)? as u64; + let high = pf_dev.read_hw(offset, size)? as u64; + (low | (high << 32)) & !0xf + } + PciMemType::Io => (pf_dev.read_hw(offset, size)? as u64) & !0x3, + PciMemType::Mem32 => (pf_dev.read_hw(offset, size)? as u64) & !0xf, + _ => return Ok(false), + }; + + for (vf_index, vf_bdf) in vf_bdfs.into_iter().enumerate() { + let vf_value = pf_value.saturating_add((vf_index as u64).saturating_mul(bar_size)); + let propagated_value = match bar_type { + PciMemType::Mem64Low => vf_value as u32 as usize, + PciMemType::Mem64High => (vf_value >> 32) as u32 as usize, + PciMemType::Io | PciMemType::Mem32 => vf_value as u32 as usize, + _ => continue, + }; + + for vf_dev in collect_vf_device_copies(vf_bdf) { + let is_root = is_this_root_zone(); + let is_dev_belong_to_zone = { + let base = vf_dev.read().get_base(); + let zone = this_zone(); + let mut guard = zone.write(); + let vbus = guard.vpci_bus_mut(); + vbus.get_device_by_base(base).is_some() + }; + + // let vf_id = vf_dev + // .read() + // .get_sriov_vf_info() + // .map(|vf_info| vf_info.vf_index) + // .unwrap_or(vf_index as u16); + + let _ = handle_endpoint_access( + vf_dev, + EndpointField::Bar(slot), + propagated_value, + true, + true, + is_root, + is_dev_belong_to_zone, + )?; + } + } + + Ok(true) +} + +fn handle_cap_access( + dev: ArcRwLockVirtualPciConfigSpace, + offset: PciConfigAddress, + size: usize, + value: usize, + is_write: bool, + is_dev_belong_to_zone: bool, +) -> HvResult> { + // Handle capability region access (offset >= 0x34) + if offset == 0x34 { + // Cap Pointer register (may be accessed as different sizes) + if is_dev_belong_to_zone { + // Direct pass through to hardware + if is_write { + dev.write_hw(offset, size, value)?; + Ok(None) + } else { + Ok(Some(dev.read_hw(offset, size)?)) + } + } else { + // Device not belong to zone, return 0 (no capability) + if is_write { + Ok(None) + } else { + Ok(Some(0)) + } + } + } else if offset >= 0x100 { + #[cfg(feature = "sriov")] + if let Some(cap_offset) = dev.with_sriov_info(|sriov_info| sriov_info.cap_offset) { + if offset >= cap_offset && offset < cap_offset + SRIOV_CAP_SIZE { + if is_write { + dev.write_hw(offset, size, value)?; + + let _ = sync_sriov_vf_bar_state(dev.clone(), offset, size, value)?; + return Ok(None); + } + let read_value = dev.read_hw(offset, size)?; + return Ok(Some(read_value)); + } + } + + // When `sriov` feature is disabled, hide the SR-IOV extended capability + // from guest VMs by patching the ext-cap linked list on the fly. + #[cfg(not(feature = "sriov"))] + if let Some(hide) = dev.with_hide_sriov(|h| h.clone()) { + use bit_field::BitField; + // Accesses inside the SR-IOV cap range: return 0 / silently drop writes. + if offset >= hide.sriov_cap_offset && offset < hide.sriov_cap_offset + SRIOV_CAP_SIZE { + if is_write { + return Ok(None); + } else { + return Ok(Some(0)); + } + } + + // Access to the first DWORD of the preceding cap node: patch the + // `next` pointer so it skips over the SR-IOV cap. + if let Some(prev_offset) = hide.prev_cap_offset { + if offset >= prev_offset && offset < prev_offset + 4 { + if is_write { + // Pass writes through unchanged; the physical `next` + // pointer still points to SR-IOV which is fine for host. + dev.write_hw(offset, size, value)?; + return Ok(None); + } else { + // Always read the full DWORD, patch bits[31:20], then + // return the sub-slice the guest asked for. + let mut dw = dev.read_hw(prev_offset, 4)? as u32; + dw.set_bits(20..32, hide.sriov_cap_next as u32); + let byte_offset = (offset - prev_offset) as usize; + let result = match size { + 1 => ((dw >> (byte_offset * 8)) & 0xFF) as usize, + 2 => ((dw >> (byte_offset * 8)) & 0xFFFF) as usize, + _ => dw as usize, + }; + return Ok(Some(result)); + } + } + } + // If SR-IOV is the first ext cap (prev_cap_offset == None) and the + // guest reads offset 0x100, the cap header has already been zeroed + // above so the guest sees no extended capabilities at all. + } + + if is_write { + dev.write_hw(offset, size, value)?; + Ok(None) + } else { + Ok(Some(dev.read_hw(offset, size)?)) + } + } else { + // Other capability region offsets + // Try to find the capability that contains this offset + let cap_info = dev.with_cap(|capabilities| { + capabilities + .cap_in_config_ref() + .range(..=offset as u64) + .next_back() + .map(|(cap_offset, cap)| (*cap_offset, cap.get_type())) + }); + + if let Some((cap_offset, cap_type)) = cap_info { + let cap_offset = cap_offset as usize; + let relative_offset = offset as usize - cap_offset; + + if cap_type == CapabilityType::Msi { + let vbdf = dev.get_vbdf(); + let _domain_id = vbdf.domain(); + + let is_msi_64 = dev.with_cap(|capabilities| { + capabilities + .cap_in_config_ref() + .get(&(cap_offset as u64)) + .and_then(|cap| cap.with_region(|region| region.read(0x02, 2).ok())) + .map(|ctrl| (ctrl & (1 << 7)) != 0) + .unwrap_or(false) + }); + + let _is_addr_low = matches!(relative_offset, 4 | 5 | 6 | 7); + let _is_addr_high = is_msi_64 && matches!(relative_offset, 8 | 9 | 10 | 11); + let _is_msg_data = if is_msi_64 { + matches!(relative_offset, 12 | 13) + } else { + matches!(relative_offset, 8 | 9) + }; + + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if is_write { + if _is_addr_low { + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff00000000; + msi_info.set_doorbell(current | (value as u64)); + }); + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + dev.write_hw(offset, size, (hw_paddr & 0xffffffff) as usize)?; + return Ok(None); + } + if _is_addr_high { + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff; + msi_info.set_doorbell(current | ((value as u64) << 32)); + }); + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + dev.write_hw(offset, size, ((hw_paddr >> 32) & 0xffffffff) as usize)?; + return Ok(None); + } + if _is_msg_data { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = vbus.domain_msi_info().get(&_domain_id) { + let hw_value = + (value as u32).wrapping_add(domain_msi_info.hwirq_bit); + dev.write_hw(offset, size, hw_value as usize)?; + } else { + dev.write_hw(offset, size, value)?; + } + return Ok(None); + } + } else { + if _is_addr_low { + let vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi_info| msi_info.msi_doorbell) + .unwrap_or(0); + return Ok(Some((vm_doorbell & 0xffffffff) as usize)); + } + if _is_addr_high { + let vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi_info| msi_info.msi_doorbell) + .unwrap_or(0); + return Ok(Some(((vm_doorbell >> 32) & 0xffffffff) as usize)); + } + if _is_msg_data { + let hw_value = dev.read_hw(offset, size)?; + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = vbus.domain_msi_info().get(&_domain_id) { + let hwirq_bit = domain_msi_info.hwirq_bit; + let hw_vec = hw_value as u32; + let virq_bit = if hw_vec >= hwirq_bit { + hw_vec - hwirq_bit + } else { + hw_vec + }; + return Ok(Some(virq_bit as usize)); + } + return Ok(Some(hw_value)); + } + } + } + } + + // Direct pass through to hardware for all cap access + if is_write { + dev.write_hw(offset, size, value)?; + Ok(None) + } else { + Ok(Some(dev.read_hw(offset, size)?)) + } + } else { + // No capability found at this offset + Ok(None) + } + } +} + fn handle_endpoint_access( dev: ArcRwLockVirtualPciConfigSpace, field: EndpointField, @@ -277,6 +635,30 @@ fn handle_endpoint_access( * as previously described */ let bar_type = dev.with_bar_ref(slot, |bar| bar.get_type()); + + // Check if this BAR contains MSIX table (only when dwc_msi feature is enabled) + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + let is_msix_bar = { + let msix_check_slot = if bar_type == PciMemType::Mem64High && slot > 0 { + slot - 1 + } else { + slot + }; + + dev.read() + .get_msi_info() + .and_then(|msi_info| { + msi_info + .msix_info + .as_ref() + .map(|msix| msix.bar_id == msix_check_slot as u8) + }) + .unwrap_or(false) + }; + + #[cfg(not(feature = "dwc_msi"))] + let is_msix_bar = false; + if bar_type != PciMemType::default() { if is_write { if is_direct && is_root { @@ -291,22 +673,34 @@ fn handle_endpoint_access( value, )?; if (bar_type == PciMemType::Mem32) + | (bar_type == PciMemType::Mem64Low) | (bar_type == PciMemType::Mem64High) | (bar_type == PciMemType::Io) { + let old_vaddr = + dev.with_bar_ref(slot, |bar| bar.get_virtual_value64()) & !0xf; let new_vaddr = { - if bar_type == PciMemType::Mem64High { - /* last 4bit is flag, not address and need ignore - * flag will auto add when set_value and set_virtual_value - * Read from config_value.bar_value cache instead of space - */ - let low_value = dev - .with_config_value(|cv| cv.get_bar_value(slot - 1)) - as u64; - let high_value = (value as u32 as u64) << 32; - (low_value | high_value) & !0xf - } else { - (value as u64) & !0xf + match bar_type { + PciMemType::Mem64Low => { + let low_value = value as u32 as u64; + let high_value = (dev + .with_config_value(|cv| cv.get_bar_value(slot + 1)) + as u64) + << 32; + (low_value | high_value) & !0xf + } + PciMemType::Mem64High => { + /* last 4bit is flag, not address and need ignore + * flag will auto add when set_value and set_virtual_value + * Read from config_value.bar_value cache instead of space + */ + let low_value = dev + .with_config_value(|cv| cv.get_bar_value(slot - 1)) + as u64; + let high_value = (value as u32 as u64) << 32; + (low_value | high_value) & !0xf + } + _ => (value as u64) & !0xf, } }; @@ -316,12 +710,102 @@ fn handle_endpoint_access( dev.with_bar_ref_mut(slot - 1, |bar| { bar.set_virtual_value(new_vaddr) }); + } else if bar_type == PciMemType::Mem64Low { + dev.with_bar_ref_mut(slot + 1, |bar| { + bar.set_virtual_value(new_vaddr) + }); } // set value dev.with_bar_ref_mut(slot, |bar| bar.set_value(new_vaddr)); if bar_type == PciMemType::Mem64High { dev.with_bar_ref_mut(slot - 1, |bar| bar.set_value(new_vaddr)); + } else if bar_type == PciMemType::Mem64Low { + dev.with_bar_ref_mut(slot + 1, |bar| bar.set_value(new_vaddr)); + } + + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } + + let bar_size = { + let size = dev.with_bar_ref(slot, |bar| bar.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + + if is_msix_bar { + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + guard.mmio_region_register( + new_vaddr_aligned as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + {} + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + } + drop(guard); + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); } } } @@ -332,24 +816,34 @@ fn handle_endpoint_access( }); if (value & 0xfffffff0) != 0xfffffff0 { if (bar_type == PciMemType::Mem32) + | (bar_type == PciMemType::Mem64Low) | (bar_type == PciMemType::Mem64High) | (bar_type == PciMemType::Io) { let old_vaddr = dev.with_bar_ref(slot, |bar| bar.get_virtual_value64()) & !0xf; let new_vaddr = { - if bar_type == PciMemType::Mem64High { - /* last 4bit is flag, not address and need ignore - * flag will auto add when set_value and set_virtual_value - * Read from config_value.bar_value cache instead of space - */ - let low_value = dev - .with_config_value(|cv| cv.get_bar_value(slot - 1)) - as u64; - let high_value = (value as u32 as u64) << 32; - (low_value | high_value) & !0xf - } else { - (value as u64) & !0xf + match bar_type { + PciMemType::Mem64Low => { + let low_value = value as u32 as u64; + let high_value = (dev + .with_config_value(|cv| cv.get_bar_value(slot + 1)) + as u64) + << 32; + (low_value | high_value) & !0xf + } + PciMemType::Mem64High => { + /* last 4bit is flag, not address and need ignore + * flag will auto add when set_value and set_virtual_value + * Read from config_value.bar_value cache instead of space + */ + let low_value = dev + .with_config_value(|cv| cv.get_bar_value(slot - 1)) + as u64; + let high_value = (value as u32 as u64) << 32; + (low_value | high_value) & !0xf + } + _ => (value as u64) & !0xf, } }; @@ -360,10 +854,36 @@ fn handle_endpoint_access( dev.with_bar_ref_mut(slot - 1, |bar| { bar.set_virtual_value(new_vaddr) }); + } else if bar_type == PciMemType::Mem64Low { + dev.with_bar_ref_mut(slot + 1, |bar| { + bar.set_virtual_value(new_vaddr) + }); } - let paddr = - dev.with_bar_ref(slot, |bar| bar.get_value64()) as HostPhysAddr; + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } let bar_size = { let size = dev.with_bar_ref(slot, |bar| bar.get_size()); if crate::memory::addr::is_aligned(size as usize) { @@ -381,20 +901,37 @@ fn handle_endpoint_access( let zone = this_zone(); let mut guard = zone.write(); - let gpm = guard.gpm_mut(); - if !gpm - .try_delete(old_vaddr.try_into().unwrap(), bar_size as usize) - .is_ok() - { - // warn!("delete bar {}: can not found 0x{:x}", slot, old_vaddr); + if is_msix_bar { + // Remove old MSIX handler if it exists + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + // Register new MSIX handler at new address + guard.mmio_region_register( + new_vaddr as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + // Delete old gpm mapping if it exists + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + { + // warn!("delete bar {}: can not found 0x{:x}", slot, old_vaddr); + } + // Insert new gpm mapping at new address + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; } - gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( - new_vaddr as GuestPhysAddr, - paddr as HostPhysAddr, - bar_size as _, - MemFlags::READ | MemFlags::WRITE, - ))?; drop(guard); /* after update gpm, mem barrier is needed */ @@ -480,99 +1017,589 @@ fn handle_endpoint_access( configvalue.set_rom_value(value as u32); }); - if value & 0xfffff800 != 0xfffff800 { + // Check if this is size probe (all 1s in BA field, bits 31-11) + let is_size_probe = (value & 0xfffff800) == 0xfffff800; + // Check if ROM enable bit (bit 0) is set + let rom_enabled = (value & 0x1) != 0; + + if !is_size_probe { let old_vaddr = dev.with_rom_ref(|rom| rom.get_virtual_value64()) & !0xf; let new_vaddr = (value as u64) & !0xf; - dev.with_rom_ref_mut(|rom| rom.set_virtual_value(new_vaddr)); + // Only perform mapping operations if ROM enable bit is set + if rom_enabled { + // set new_value not new_vaddr, because `set_virtual_value` will not add enable flag automatically + dev.with_rom_ref_mut(|rom| rom.set_virtual_value(value as _)); + + // Write to hardware with enable bit set + // Get the current ROM value from hardware and set bit 0 + // And not to use rom.set_value() + let hw_value = dev.with_rom_ref(|rom| rom.get_value64()); + let hw_value_enabled = hw_value | 0x1; // Set enable bit + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + hw_value_enabled as usize, + )?; + dev.with_rom_ref_mut(|rom| rom.set_value(hw_value_enabled)); - let paddr = if is_root { - dev.with_rom_ref_mut(|rom| rom.set_value(new_vaddr)); - new_vaddr as HostPhysAddr - } else { - dev.with_rom_ref(|rom| rom.get_value64()) as HostPhysAddr - }; + let paddr = + dev.with_rom_ref(|rom| rom.get_value64()) as HostPhysAddr; - let rom_size = { - let size = dev.with_rom_ref(|rom| rom.get_size()); - if crate::memory::addr::is_aligned(size as usize) { - size - } else { - crate::memory::PAGE_SIZE as u64 - } - }; - let new_vaddr = if !crate::memory::addr::is_aligned(new_vaddr as usize) - { - crate::memory::addr::align_up(new_vaddr as usize) as u64 - } else { - new_vaddr as u64 - }; + let rom_size = { + let size = dev.with_rom_ref(|rom| rom.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; - let zone = this_zone(); - let mut guard = zone.write(); - let gpm = guard.gpm_mut(); + let zone = this_zone(); + let mut guard = zone.write(); + let gpm = guard.gpm_mut(); - if !gpm - .try_delete(old_vaddr.try_into().unwrap(), rom_size as usize) - .is_ok() - { - // warn!("delete rom bar: can not found 0x{:x}", old_vaddr); - } - gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( - new_vaddr as GuestPhysAddr, - paddr as HostPhysAddr, - rom_size as _, - MemFlags::READ | MemFlags::WRITE, - ))?; - drop(guard); - /* after update gpm, mem barrier is needed - */ - #[cfg(target_arch = "aarch64")] - unsafe { - core::arch::asm!("isb"); - core::arch::asm!("tlbi vmalls12e1is"); - core::arch::asm!("dsb nsh"); - } - /* after update gpm, need to flush iommu table - * in x86_64 - */ - #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] - { - let vbdf = dev.get_vbdf(); - crate::device::iommu::flush( - this_zone_id(), - vbdf.bus, - (vbdf.device << 3) + vbdf.function, - ); - } - #[cfg(target_arch = "riscv64")] - unsafe { - // TOOD: add remote fence support (using sbi rfence spec?) - core::arch::asm!("hfence.gvma"); - } - } - } - Ok(None) - } else { - // read rom bar - if (dev.with_config_value(|configvalue| configvalue.get_rom_value())) - & 0xfffff800 - == 0xfffff800 - { - /* - * config_value being 0xFFFF_FFFF means that Linux is attempting to determine the ROM size. - * The value is used directly here because Linux will rewrite this register later, - * so the Hvisor does not need to preserve any additional state. - */ - Ok(Some( - dev.with_rom_ref(|rom| rom.get_size_with_flag()) as usize - )) - } else { - Ok(Some( - dev.with_config_value(|configvalue| configvalue.get_rom_value()) - as usize, - )) + if !gpm + .try_delete(old_vaddr.try_into().unwrap(), rom_size as usize) + .is_ok() + { + // warn!("delete rom bar: can not found 0x{:x}", old_vaddr); + } + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + rom_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + drop(guard); + /* after update gpm, mem barrier is needed + */ + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + /* after update gpm, need to flush iommu table + * in x86_64 + */ + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + #[cfg(target_arch = "riscv64")] + unsafe { + // TOOD: add remote fence support (using sbi rfence spec?) + core::arch::asm!("hfence.gvma"); + } + } else { + // ROM disabled + } + } + } + Ok(None) + } else { + // read rom bar + if (dev.with_config_value(|configvalue| configvalue.get_rom_value())) + & 0xfffff800 + == 0xfffff800 + { + /* + * config_value being 0xFFFF_FFFF means that Linux is attempting to determine the ROM size. + * The value is used directly here because Linux will rewrite this register later, + * so the Hvisor does not need to preserve any additional state. + */ + Ok(Some( + dev.with_rom_ref(|rom| rom.get_size_with_flag()) as usize + )) + } else { + Ok(Some( + dev.with_config_value(|configvalue| configvalue.get_rom_value()) + as usize, + )) + } + } + } else { + Ok(None) + } + } + _ => Ok(None), + } +} + +fn handle_pci_bridge_access( + dev: ArcRwLockVirtualPciConfigSpace, + field: BridgeField, + value: usize, + is_write: bool, + is_direct: bool, + is_root: bool, + is_dev_belong_to_zone: bool, +) -> HvResult> { + match field { + BridgeField::Bar(slot) => { + let bar_type = dev.with_bar_ref(slot, |bar| bar.get_type()); + + // Check if this BAR contains MSIX table (only when dwc_msi feature is enabled) + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + let is_msix_bar = { + let msix_check_slot = if bar_type == PciMemType::Mem64High && slot > 0 { + slot - 1 + } else { + slot + }; + + dev.read() + .get_msi_info() + .and_then(|msi_info| { + msi_info + .msix_info + .as_ref() + .map(|msix| msix.bar_id == msix_check_slot as u8) + }) + .unwrap_or(false) + }; + + #[cfg(not(feature = "dwc_msi"))] + let is_msix_bar = false; + + if bar_type != PciMemType::default() { + if is_write { + if is_direct && is_root { + // direct mode and root zone, update resources directly + dev.with_config_value_mut(|configvalue| { + configvalue.set_bar_value(slot, value as u32); + }); + if (value & 0xfffffff0) != 0xfffffff0 { + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + value, + )?; + if (bar_type == PciMemType::Mem32) + | (bar_type == PciMemType::Mem64High) + | (bar_type == PciMemType::Io) + { + let old_vaddr = + dev.with_bar_ref(slot, |bar| bar.get_virtual_value64()) & !0xf; + let new_vaddr = { + if bar_type == PciMemType::Mem64High { + let low_value = dev + .with_config_value(|cv| cv.get_bar_value(slot - 1)) + as u64; + let high_value = (value as u32 as u64) << 32; + (low_value | high_value) & !0xf + } else { + (value as u64) & !0xf + } + }; + + // set virt_value + dev.with_bar_ref_mut(slot, |bar| bar.set_virtual_value(new_vaddr)); + if bar_type == PciMemType::Mem64High { + dev.with_bar_ref_mut(slot - 1, |bar| { + bar.set_virtual_value(new_vaddr) + }); + } + + // set value + dev.with_bar_ref_mut(slot, |bar| bar.set_value(new_vaddr)); + if bar_type == PciMemType::Mem64High { + dev.with_bar_ref_mut(slot - 1, |bar| bar.set_value(new_vaddr)); + } + + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } + + let bar_size = { + let size = dev.with_bar_ref(slot, |bar| bar.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + + if is_msix_bar { + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + guard.mmio_region_register( + new_vaddr_aligned as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + {} + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + } + drop(guard); + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + } + } + } else if is_dev_belong_to_zone { + // normal mode, update virt resources + dev.with_config_value_mut(|configvalue| { + configvalue.set_bar_value(slot, value as u32); + }); + if (value & 0xfffffff0) != 0xfffffff0 { + if (bar_type == PciMemType::Mem32) + | (bar_type == PciMemType::Mem64High) + | (bar_type == PciMemType::Io) + { + let old_vaddr = + dev.with_bar_ref(slot, |bar| bar.get_virtual_value64()) & !0xf; + let new_vaddr = { + if bar_type == PciMemType::Mem64High { + let low_value = dev + .with_config_value(|cv| cv.get_bar_value(slot - 1)) + as u64; + let high_value = (value as u32 as u64) << 32; + (low_value | high_value) & !0xf + } else { + (value as u64) & !0xf + } + }; + + dev.with_bar_ref_mut(slot, |bar| bar.set_virtual_value(new_vaddr)); + if bar_type == PciMemType::Mem64High { + dev.with_bar_ref_mut(slot - 1, |bar| { + bar.set_virtual_value(new_vaddr) + }); + } + + let paddr = { + let raw = dev.with_bar_ref(slot, |bar| bar.get_value64()) + as HostPhysAddr; + if bar_type == PciMemType::Io { + raw & !0x3 + } else { + raw & !0xf + } + }; + + if is_msix_bar { + dev.with_msi_info_mut(|msi_info| { + if let Some(msix) = msi_info.msix_info.as_mut() { + let msix_slot = if bar_type == PciMemType::Mem64High { + slot - 1 + } else { + slot + }; + if msix.bar_id as usize == msix_slot { + msix.bar_paddr = paddr as u64; + } + } + }); + } + let bar_size = { + let size = dev.with_bar_ref(slot, |bar| bar.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + + if is_msix_bar { + // Remove old MSIX handler if it exists + guard.mmio_region_remove(old_vaddr as GuestPhysAddr); + // Register new MSIX handler at new address + guard.mmio_region_register( + new_vaddr_aligned as GuestPhysAddr, + bar_size as usize, + mmio_msix_table_handler, + paddr as usize, + ); + } else { + // Delete old gpm mapping if it exists + let gpm = guard.gpm_mut(); + if !gpm + .try_delete( + old_vaddr.try_into().unwrap(), + bar_size as usize, + ) + .is_ok() + { + // warn!("delete bar {}: can not found 0x{:x}", slot, old_vaddr); + } + // Insert new gpm mapping at new address + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + bar_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + } + drop(guard); + /* after update gpm, mem barrier is needed + */ + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + /* after update gpm, need to flush iommu table + * in x86_64 + */ + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + } + } + } + Ok(None) + } else { + // read bar + if (dev.with_config_value(|configvalue| configvalue.get_bar_value(slot)) + & 0xfffffff0) + == 0xfffffff0 + { + /* + * tmp_value being 0xFFFF_FFFF means that Linux is attempting to determine the BAR size. + * The value of tmp_value is used directly here because Linux will rewrite this register later, + * so the Hvisor does not need to preserve any additional state. + */ + Ok(Some( + dev.with_bar_ref(slot, |bar| bar.get_size_with_flag()) as usize + )) + } else { + Ok(Some( + dev.with_config_value(|configvalue| configvalue.get_bar_value(slot)) + as usize, + )) + } + } + } else { + Ok(None) + } + } + BridgeField::ExpansionRomBar => { + // rom is same with bar + let rom_type = dev.with_rom_ref(|rom| rom.get_type()); + if rom_type == PciMemType::Rom { + if is_write { + if is_direct && is_root { + dev.with_config_value_mut(|configvalue| { + configvalue.set_rom_value(value as u32); + }); + if value & 0xfffff800 != 0xfffff800 { + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + value, + )?; + + let new_vaddr = (value as u64) & !0xf; + + // set virt_value + dev.with_rom_ref_mut(|rom| rom.set_virtual_value(new_vaddr)); + + // set value + dev.with_rom_ref_mut(|rom| rom.set_value(new_vaddr)); + } + } else if is_dev_belong_to_zone { + // normal mode, update virt resources + dev.with_config_value_mut(|configvalue| { + configvalue.set_rom_value(value as u32); + }); + + // Check if this is size probe (all 1s in BA field, bits 31-11) + let is_size_probe = (value & 0xfffff800) == 0xfffff800; + // Check if ROM enable bit (bit 0) is set + let rom_enabled = (value & 0x1) != 0; + + if !is_size_probe { + let old_vaddr = + dev.with_rom_ref(|rom| rom.get_virtual_value64()) & !0xf; + let new_vaddr = (value as u64) & !0xf; + + // Only perform mapping operations if ROM enable bit is set + if rom_enabled { + // set new_value not new_vaddr, because `set_virtual_value` will not add enable flag automatically + dev.with_rom_ref_mut(|rom| rom.set_virtual_value(value as _)); + + // Write to hardware with enable bit set + // Get the current ROM value from hardware and set bit 0 + // And not to use rom.set_value() + let hw_value = dev.with_rom_ref(|rom| rom.get_value64()); + let hw_value_enabled = hw_value | 0x1; // Set enable bit + dev.write_hw( + field.to_offset() as PciConfigAddress, + field.size(), + hw_value_enabled as usize, + )?; + dev.with_rom_ref_mut(|rom| rom.set_value(hw_value_enabled)); + + let paddr = + dev.with_rom_ref(|rom| rom.get_value64()) as HostPhysAddr; + + let rom_size = { + let size = dev.with_rom_ref(|rom| rom.get_size()); + if crate::memory::addr::is_aligned(size as usize) { + size + } else { + crate::memory::PAGE_SIZE as u64 + } + }; + let new_vaddr_aligned = + if !crate::memory::addr::is_aligned(new_vaddr as usize) { + crate::memory::addr::align_up(new_vaddr as usize) as u64 + } else { + new_vaddr as u64 + }; + + let zone = this_zone(); + let mut guard = zone.write(); + let gpm = guard.gpm_mut(); + + if !gpm + .try_delete(old_vaddr.try_into().unwrap(), rom_size as usize) + .is_ok() + { + // warn!("delete rom bar: can not found 0x{:x}", old_vaddr); + } + gpm.try_insert_quiet(MemoryRegion::new_with_offset_mapper( + new_vaddr_aligned as GuestPhysAddr, + paddr as HostPhysAddr, + rom_size as _, + MemFlags::READ | MemFlags::WRITE, + ))?; + drop(guard); + /* after update gpm, mem barrier is needed + */ + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("isb"); + core::arch::asm!("tlbi vmalls12e1is"); + core::arch::asm!("dsb nsh"); + } + /* after update gpm, need to flush iommu table + * in x86_64 + */ + #[cfg(all(target_arch = "x86_64", feature = "intel_vtd"))] + { + let vbdf = dev.get_vbdf(); + crate::device::iommu::flush( + this_zone_id(), + vbdf.bus, + (vbdf.device << 3) + vbdf.function, + ); + } + #[cfg(target_arch = "riscv64")] + unsafe { + // TOOD: add remote fence support (using sbi rfence spec?) + core::arch::asm!("hfence.gvma"); + } + } else { + // ROM disabled + } + } + } + Ok(None) + } else { + // read rom bar + if (dev.with_config_value(|configvalue| configvalue.get_rom_value())) + & 0xfffff800 + == 0xfffff800 + { + /* + * config_value being 0xFFFF_FFFF means that Linux is attempting to determine the ROM size. + * The value is used directly here because Linux will rewrite this register later, + * so the Hvisor does not need to preserve any additional state. + */ + Ok(Some( + dev.with_rom_ref(|rom| rom.get_size_with_flag()) as usize + )) + } else { + Ok(Some( + dev.with_config_value(|configvalue| configvalue.get_rom_value()) + as usize, + )) } } } else { @@ -583,14 +1610,6 @@ fn handle_endpoint_access( } } -fn handle_pci_bridge_access( - _dev: ArcRwLockVirtualPciConfigSpace, - _field: BridgeField, - _is_write: bool, -) -> HvResult> { - Ok(None) -} - /* * is_direct: if true, root can allocate resource for device belonging * to ohter zone but can't drive it @@ -622,6 +1641,13 @@ fn handle_config_space_access( let vbdf = dev.get_bdf(); let dev_type = dev.get_dev_type(); + if !is_root && dev.read().get_sriov_vf_info().is_some() { + if offset == 0x100 { + mmio.value = 0x0; + return Ok(()); + } + } + if is_root || is_dev_belong_to_zone { match dev.access(offset, size) { false => { @@ -661,25 +1687,63 @@ fn handle_config_space_access( let config_type = dev.get_config_type(); match config_type { HeaderType::Endpoint => { - if let Some(val) = handle_endpoint_access( - dev, - EndpointField::from(offset as usize, size), - value, - is_write, - is_direct, - is_root, - is_dev_belong_to_zone, - )? { - mmio.value = val; + // Check if this is capability region access (offset >= 0x40) + if (offset >= 0x40 && offset < 0x100) + || (offset == 0x34) + || (offset >= 0x100) + { + if let Some(val) = handle_cap_access( + dev, + offset, + size, + value, + is_write, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } + } else { + if let Some(val) = handle_endpoint_access( + dev, + EndpointField::from(offset as usize, size), + value, + is_write, + is_direct, + is_root, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } } } HeaderType::PciBridge => { - if let Some(val) = handle_pci_bridge_access( - dev, - BridgeField::from(offset as usize, size), - is_write, - )? { - mmio.value = val; + // Check if this is capability region access (offset >= 0x40) + if (offset >= 0x40 && offset < 0x100) + || (offset == 0x34) + || (offset >= 0x100) + { + if let Some(val) = handle_cap_access( + dev, + offset, + size, + value, + is_write, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } + } else { + if let Some(val) = handle_pci_bridge_access( + dev, + BridgeField::from(offset as usize, size), + value, + is_write, + is_direct, + is_root, + is_dev_belong_to_zone, + )? { + mmio.value = val; + } } } _ => { @@ -825,10 +1889,53 @@ pub fn mmio_dwc_cfg_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult { let dbi_region = PciRegionMmio::new(dbi_base, dbi_size); let dbi_backend = DwcConfigRegionBackend::new(dbi_region); - // warn!("atu config {:#?}", atu); + let pci_target = atu.pci_target(); + let target_bus = ((pci_target >> 24) & 0xff) as u8; + let target_device = ((pci_target >> 19) & 0x1f) as u8; + let target_function = ((pci_target >> 16) & 0x7) as u8; + + let mapped_target = { + let zone_guard = zone.read(); + let vbus = zone_guard.vpci_bus(); + vbus.devs_ref().values().find_map(|dev| { + let vbdf = dev.get_vbdf(); + if vbdf.bus() == target_bus + && vbdf.device() == target_device + && vbdf.function() == target_function + { + Some((dev.get_bdf(), dev.get_parent_bus())) + } else { + None + } + }) + }; + + let mut hw_pci_target = pci_target; + let mut atu_type = atu.atu_type(); + let mut config_base = atu.cpu_base(); + let mut cpu_limit = atu.cpu_limit(); + if let Some((host_bdf, parent_bus)) = mapped_target { + hw_pci_target = ((host_bdf.bus() as u64) << 24) + + ((host_bdf.device() as u64) << 19) + + ((host_bdf.function() as u64) << 16); + (config_base, atu_type) = if parent_bus == 0 { + (extend_config.cfg_base, AtuType::Cfg0) + } else { + ( + extend_config.cfg_base + (extend_config.cfg_size / 2), + AtuType::Cfg1, + ) + }; + cpu_limit = config_base + (extend_config.cfg_size / 2) - 1; + } - // Call AtuUnroll to program the ATU - AtuUnroll::dw_pcie_prog_outbound_atu_unroll(&dbi_backend, &atu)?; + // Program hardware ATU with translated host target when remap exists. + let mut hw_atu = atu; + hw_atu.set_pci_target(hw_pci_target); + hw_atu.set_atu_type(atu_type); + hw_atu.set_cpu_base(config_base); + hw_atu.set_cpu_limit(cpu_limit); + AtuUnroll::dw_pcie_prog_outbound_atu_unroll(&dbi_backend, &hw_atu)?; } let offset = (mmio.address & 0xfff) as PciConfigAddress; @@ -872,7 +1979,14 @@ pub fn mmio_dwc_cfg_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult { let is_root = is_this_root_zone(); let is_direct = true; // dwc_cfg_handler uses direct mode - handle_config_space_access(dev, mmio, offset, is_direct, is_root, is_dev_belong_to_zone)?; + handle_config_space_access( + dev.clone(), + mmio, + offset, + is_direct, + is_root, + is_dev_belong_to_zone, + )?; } else { warn!("No ATU config yet, do nothing"); } @@ -883,180 +1997,542 @@ pub fn mmio_dwc_cfg_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult { pub fn mmio_vpci_handler_dbi(mmio: &mut MMIOAccess, _base: usize) -> HvResult { // info!("mmio_vpci_handler_dbi {:#x}", mmio.address); - /* 0x0-0x100 is outbound atu0 reg - * 0x100-0x200 is inbound atu0 reg just handle outbound right now - * so MAX is ATU_BASE + ATU_REGION_SIZE/2 - */ - if mmio.address >= ATU_BASE && mmio.address < ATU_BASE + ATU_REGION_SIZE / 2 { - let zone = this_zone(); - let mut guard = zone.write(); - let ecam_base = _base; - let atu_offset = mmio.address - ATU_BASE; + use crate::platform; + + // Decode domain_id and ecam_base from arg: + // arg = ecam_base + domain_id + // Since ecam_base is 4KB aligned (low 12 bits are 0), + // low bits contain domain_id, high bits contain ecam_base + let domain_id = (_base & 0xF) as u8; + let ecam_base = _base - (domain_id as usize); + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + { + // Delay mode semantics: + // - Before init-done, accesses to non-zero DBI regs are normally passed through. + // - For dwc_msi, MSI_ADDR_LO/HI are intercepted early so VM doorbell writes are cached. + // - Access to DBI reg 0 triggers hvisor PCI init, then normal DBI virtualization continues. + if !is_pci_init_done(domain_id) { + if mmio.address != 0 { + #[cfg(feature = "dwc_msi")] + match mmio.address { + PCIE_MSI_ADDR_LO | PCIE_MSI_ADDR_HI => { + let zone = this_zone(); + let mut guard = zone.write(); + let vbus = guard.vpci_bus_mut(); + + if vbus.domain_msi_info().get(&domain_id).is_none() { + vbus.add_msi_count_for_domain(domain_id, 1, 0); + } + + if let Some(domain_msi_info) = + vbus.domain_msi_info_mut().get_mut(&domain_id) + { + if mmio.is_write { + let vm_doorbell = domain_msi_info.get_vm_doorbell(); + let new_val = if mmio.address == PCIE_MSI_ADDR_LO { + (vm_doorbell & 0xffffffff00000000) | (mmio.value as u64) + } else { + (vm_doorbell & 0xffffffff) | ((mmio.value as u64) << 32) + }; + domain_msi_info.set_vm_doorbell(new_val); + } else { + let vm_doorbell = domain_msi_info.get_vm_doorbell(); + mmio.value = if mmio.address == PCIE_MSI_ADDR_LO { + (vm_doorbell & 0xffffffff) as usize + } else { + ((vm_doorbell >> 32) & 0xffffffff) as usize + }; + } + } + + return Ok(()); + } + _ => {} + } + + mmio_perform_access(ecam_base, mmio); + return Ok(()); + } + + let root_config = platform::platform_root_zone_config(); + let num_pci_bus = root_config.num_pci_bus as usize; + + crate::pci::pci_config::hvisor_pci_init(&root_config.pci_config[..num_pci_bus])?; + + let zone = crate::zone::root_zone(); + let mut inner = zone.write(); + inner.virtual_pci_mmio_init_delay(&root_config.pci_config, num_pci_bus); + inner.guest_pci_init_delay( + 0, + &root_config.alloc_pci_devs, + root_config.num_pci_devs, + &root_config.pci_config, + num_pci_bus, + )?; + + #[cfg(feature = "dwc_msi")] + { + // Why this is inside init-delay only: + // before init-done, VM may have already written MSI_ADDR_LO/HI and those writes were + // cached (virtual doorbell) but did not program final hardware state. + // After hvisor_pci_init() completes, force HW LO/HI to hvisor-allocated doorbell. + // In non-delay mode, writes go through the normal MSI register handler below, + // and first LO/HI writes are translated/synced there, so this extra sync is unnecessary. + let hw_paddr = crate::pci::dwc_msi::get_domain_doorbell_paddr(domain_id); + if hw_paddr != 0 { + let mut hw_lo_write = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: (hw_paddr & 0xffffffff) as usize, + size: 4, + is_write: true, + }; + let mut hw_hi_write = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: ((hw_paddr >> 32) & 0xffffffff) as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_lo_write); + mmio_perform_access(ecam_base, &mut hw_hi_write); + } + } + + set_pci_init_done(domain_id); + info!( + "Hvisor PCI initialization complete for domain {}", + domain_id + ); + } + } + + // Read extend_config to get io_atu_index + let extend_config = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|cfg| cfg.ecam_base == ecam_base as u64); + + if let Some(extend_config) = extend_config { + let io_atu_index = extend_config.io_atu_index as usize; + let atu_base = ATU_BASE + io_atu_index * ATU_REGION_SIZE; + + /* Calculate outbound atu registers address range based on io_atu_index + * Each ATU has: 0x0-0x100 for outbound, 0x100-0x200 for inbound + * We only handle outbound now, so MAX is atu_base + ATU_REGION_SIZE/2 + */ + if mmio.address >= atu_base && mmio.address < atu_base + ATU_REGION_SIZE / 2 { + let zone = this_zone(); + let mut guard = zone.write(); + let atu_offset = mmio.address - atu_base; - // warn!("set atu0 register {:#X} value {:#X}", atu_offset, mmio.value); + // warn!("set atu{} register {:#X} value {:#X}", io_atu_index, atu_offset, mmio.value); - let atu = guard - .atu_configs_mut() - .get_atu_by_ecam_mut(ecam_base) - .unwrap(); + let atu = guard + .atu_configs_mut() + .get_atu_by_ecam_mut(ecam_base) + .unwrap(); - // info!("atu config write {:#?}", atu); + // info!("atu config write {:#?}", atu); - if mmio.is_write { - if mmio.size == 4 { + if mmio.is_write { + if mmio.size == 4 { + match atu_offset { + PCIE_ATU_UNR_REGION_CTRL1 => { + // info!("set atu{} region ctrl1 value {:#X}", io_atu_index, mmio.value); + atu.set_atu_type(AtuType::from_u8((mmio.value & 0xff) as u8)); + } + PCIE_ATU_UNR_REGION_CTRL2 => { + // Enable bit is written here, but we just track it + // The actual enable is handled by the driver + } + PCIE_ATU_UNR_LOWER_BASE => { + // info!("set atu{} lower base value {:#X}", io_atu_index, mmio.value); + atu.set_cpu_base( + (atu.cpu_base() & !0xffffffff) | (mmio.value as PciConfigAddress), + ); + } + PCIE_ATU_UNR_UPPER_BASE => { + // info!("set atu{} upper base value {:#X}", io_atu_index, mmio.value); + atu.set_cpu_base( + (atu.cpu_base() & 0xffffffff) + | ((mmio.value as PciConfigAddress) << 32), + ); + } + PCIE_ATU_UNR_LIMIT => { + // info!("set atu{} limit value {:#X}", io_atu_index, mmio.value); + atu.set_cpu_limit( + (atu.cpu_limit() & !0xffffffff) | (mmio.value as PciConfigAddress), + ); + } + PCIE_ATU_UNR_UPPER_LIMIT => { + // Update the upper 32 bits of cpu_limit + atu.set_cpu_limit( + (atu.cpu_limit() & 0xffffffff) + | ((mmio.value as PciConfigAddress) << 32), + ); + } + PCIE_ATU_UNR_LOWER_TARGET => { + // info!("set atu{} lower target value {:#X}", io_atu_index, mmio.value); + atu.set_pci_target( + (atu.pci_target() & !0xffffffff) | (mmio.value as PciConfigAddress), + ); + } + PCIE_ATU_UNR_UPPER_TARGET => { + // info!("set atu{} upper target value {:#X}", io_atu_index, mmio.value); + atu.set_pci_target( + (atu.pci_target() & 0xffffffff) + | ((mmio.value as PciConfigAddress) << 32), + ); + } + _ => { + warn!( + "invalid atu{} write {:#x} + {:#x}", + io_atu_index, atu_offset, mmio.size + ); + } + } + } else { + warn!("invalid atu{} read size {:#x}", io_atu_index, mmio.size); + } + } else { + // Read from virtual ATU + // warn!("read atu{} {:#x}", io_atu_index, atu_offset); match atu_offset { PCIE_ATU_UNR_REGION_CTRL1 => { - // info!("set atu0 region ctrl1 value {:#X}", mmio.value); - atu.set_atu_type(AtuType::from_u8((mmio.value & 0xff) as u8)); + mmio.value = atu.atu_type() as usize; } PCIE_ATU_UNR_REGION_CTRL2 => { - // Enable bit is written here, but we just track it - // The actual enable is handled by the driver + mmio.value = ATU_ENABLE_BIT as usize; } PCIE_ATU_UNR_LOWER_BASE => { - // info!("set atu0 lower base value {:#X}", mmio.value); - atu.set_cpu_base( - (atu.cpu_base() & !0xffffffff) | (mmio.value as PciConfigAddress), - ); + mmio.value = (atu.cpu_base() & 0xffffffff) as usize; } PCIE_ATU_UNR_UPPER_BASE => { - // info!("set atu0 upper base value {:#X}", mmio.value); - atu.set_cpu_base( - (atu.cpu_base() & 0xffffffff) - | ((mmio.value as PciConfigAddress) << 32), - ); + mmio.value = ((atu.cpu_base() >> 32) & 0xffffffff) as usize; } PCIE_ATU_UNR_LIMIT => { - // info!("set atu0 limit value {:#X}", mmio.value); - atu.set_cpu_limit( - (atu.cpu_limit() & !0xffffffff) | (mmio.value as PciConfigAddress), - ); + let limit_value = (atu.cpu_limit() & 0xffffffff) as usize; + mmio.value = if limit_value == 0 { + atu.limit_hw_value() as usize + } else { + limit_value + }; } PCIE_ATU_UNR_UPPER_LIMIT => { - // Update the upper 32 bits of cpu_limit - atu.set_cpu_limit( - (atu.cpu_limit() & 0xffffffff) - | ((mmio.value as PciConfigAddress) << 32), - ); + let upper_limit = ((atu.cpu_limit() >> 32) & 0xffffffff) as usize; + mmio.value = if upper_limit == 0xffffffff { + atu.upper_limit_hw_value() as usize + } else { + upper_limit + }; } PCIE_ATU_UNR_LOWER_TARGET => { - // info!("set atu0 lower target value {:#X}", mmio.value); - atu.set_pci_target( - (atu.pci_target() & !0xffffffff) | (mmio.value as PciConfigAddress), - ); + mmio.value = (atu.pci_target() & 0xffffffff) as usize; } PCIE_ATU_UNR_UPPER_TARGET => { - // info!("set atu0 upper target value {:#X}", mmio.value); - atu.set_pci_target( - (atu.pci_target() & 0xffffffff) - | ((mmio.value as PciConfigAddress) << 32), - ); + mmio.value = ((atu.pci_target() >> 32) & 0xffffffff) as usize; } _ => { - warn!("invalid atu0 write {:#x} + {:#x}", atu_offset, mmio.size); + warn!("invalid atu{} read {:#x}", io_atu_index, atu_offset); + mmio_perform_access(_base, mmio); } } - } else { - warn!("invalid atu0 read size {:#x}", mmio.size); - } - } else { - // Read from virtual ATU - // warn!("read atu0 {:#x}", atu_offset); - match atu_offset { - PCIE_ATU_UNR_REGION_CTRL1 => { - mmio.value = atu.atu_type() as usize; - } - PCIE_ATU_UNR_REGION_CTRL2 => { - mmio.value = ATU_ENABLE_BIT as usize; - } - PCIE_ATU_UNR_LOWER_BASE => { - mmio.value = (atu.cpu_base() & 0xffffffff) as usize; - } - PCIE_ATU_UNR_UPPER_BASE => { - mmio.value = ((atu.cpu_base() >> 32) & 0xffffffff) as usize; - } - PCIE_ATU_UNR_LIMIT => { - let limit_value = (atu.cpu_limit() & 0xffffffff) as usize; - mmio.value = if limit_value == 0 { - atu.limit_hw_value() as usize - } else { - limit_value - }; - } - PCIE_ATU_UNR_UPPER_LIMIT => { - let upper_limit = ((atu.cpu_limit() >> 32) & 0xffffffff) as usize; - mmio.value = if upper_limit == 0xffffffff { - atu.upper_limit_hw_value() as usize - } else { - upper_limit - }; - } - PCIE_ATU_UNR_LOWER_TARGET => { - mmio.value = (atu.pci_target() & 0xffffffff) as usize; - } - PCIE_ATU_UNR_UPPER_TARGET => { - mmio.value = ((atu.pci_target() >> 32) & 0xffffffff) as usize; - } - _ => { - warn!("invalid atu0 read {:#x}", atu_offset); - mmio_perform_access(_base, mmio); - } } - } - } else if mmio.address > ATU_BASE + ATU_REGION_SIZE / 2 { - mmio_perform_access(_base, mmio); - } else if mmio.address >= BIT_LENTH { - // dbi read - mmio_perform_access(_base, mmio); - } else { - warn!("mmio_vpci_handler_dbi read {:#x}", mmio.address); - let offset = (mmio.address & 0xfff) as PciConfigAddress; - let zone = this_zone(); - let mut is_dev_belong_to_zone = false; + } else if mmio.address > ATU_BASE { + mmio_perform_access(ecam_base, mmio); + } else if mmio.address >= BIT_LENTH + && !(mmio.address >= PCIE_MSI_ADDR_LO && mmio.address <= PCIE_MSI_INTR0_STATUS) + { + // dbi read + mmio_perform_access(ecam_base, mmio); + } else if mmio.address >= PCIE_MSI_ADDR_LO && mmio.address <= PCIE_MSI_INTR0_STATUS { + // Handle MSI registers - virtuize only if dwc_msi feature enabled + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + // Non-delay path (or delay after init-done) uses this handler for MSI DBI regs. + // LO/HI writes are virtualized and synchronized with hvisor-managed doorbell here. + // Handle MSI registers + let dbi_offset = mmio.address; + let zone = this_zone(); + + let mut guard = zone.write(); + let vbus = guard.vpci_bus_mut(); + + if let Some(domain_msi_info) = vbus.domain_msi_info_mut().get_mut(&domain_id) { + match dbi_offset { + PCIE_MSI_ADDR_LO => { + if mmio.is_write { + // VM writes low 32 bits of doorbell address + let new_doorbell = (domain_msi_info.get_vm_doorbell() + & 0xffffffff00000000) + | (mmio.value as u64); + domain_msi_info.set_vm_doorbell(new_doorbell); + + // Check if hardware doorbell matches hvisor's allocation from DW_MSI_DOMAINS + // Read current hardware ADDR_LO and ADDR_HI to get full doorbell address + let mut hw_hi_mmio = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: 0, + size: 4, + is_write: false, + }; + // After VM writes LO, hardware still has old LO value + // We'll use the new LO from VM write and existing HI from hardware + mmio_perform_access(ecam_base, &mut hw_hi_mmio); + let hw_doorbell = + ((hw_hi_mmio.value as u64) << 32) | (mmio.value as u64); + + // Get the authoritative doorbell from DW_MSI_DOMAINS + // Actually vm set the doorbell only when this board doesn't support arch MSI + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(domain_id); + + // If hardware doorbell doesn't match hvisor's allocation, sync it + if hw_doorbell != hw_paddr && hw_paddr != 0 { + let hw_paddr_lo = (hw_paddr & 0xffffffff) as u32; + let hw_paddr_hi = ((hw_paddr >> 32) & 0xffffffff) as u32; + + // Write hvisor's doorbell LO + let mut hw_lo_write = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: hw_paddr_lo as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_lo_write); + + // Write hvisor's doorbell HI (only if needed) + if hw_paddr_hi != (hw_hi_mmio.value as u32) { + let mut hw_hi_write = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: hw_paddr_hi as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_hi_write); + } + } + } else { + // Return the low 32 bits of VM doorbell + mmio.value = + (domain_msi_info.get_vm_doorbell() & 0xffffffff) as usize; + } + } + PCIE_MSI_ADDR_HI => { + if mmio.is_write { + // VM writes high 32 bits of doorbell address + let new_doorbell = (domain_msi_info.get_vm_doorbell() & 0xffffffff) + | ((mmio.value as u64) << 32); + domain_msi_info.set_vm_doorbell(new_doorbell); + + // Check if hardware doorbell matches hvisor's allocation from DW_MSI_DOMAINS + // Read current hardware ADDR_LO and ADDR_HI to get full doorbell address + let mut hw_lo_mmio = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: 0, + size: 4, + is_write: false, + }; + mmio_perform_access(ecam_base, &mut hw_lo_mmio); + let hw_doorbell = + ((mmio.value as u64) << 32) | (hw_lo_mmio.value as u64); + + // Get the authoritative doorbell from DW_MSI_DOMAINS + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(domain_id); + + // If hardware doorbell doesn't match hvisor's allocation, sync it + if hw_doorbell != hw_paddr && hw_paddr != 0 { + let hw_paddr_lo = (hw_paddr & 0xffffffff) as u32; + let hw_paddr_hi = ((hw_paddr >> 32) & 0xffffffff) as u32; + + // Write hvisor's doorbell HI + let mut hw_hi_write = MMIOAccess { + address: PCIE_MSI_ADDR_HI, + value: hw_paddr_hi as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_hi_write); + + // Write hvisor's doorbell LO (only if needed) + if hw_paddr_lo != (hw_lo_mmio.value as u32) { + let mut hw_lo_write = MMIOAccess { + address: PCIE_MSI_ADDR_LO, + value: hw_paddr_lo as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_lo_write); + } + } + } else { + // Return the high 32 bits of VM doorbell + mmio.value = ((domain_msi_info.get_vm_doorbell() >> 32) + & 0xffffffff) + as usize; + } + } + PCIE_MSI_INTR0_ENABLE | PCIE_MSI_INTR0_MASK | PCIE_MSI_INTR0_STATUS => { + // All three registers use the same bit shifting and masking logic + let hwirq_bit = domain_msi_info.hwirq_bit; + let vm_mask = domain_msi_info.get_msi_mask(); + + if mmio.is_write { + // VM writes from virqbit 0-based perspective + // Convert to hardware perspective by left-shifting by hwirq_bit + let hw_value_vm = + (mmio.value as u32 & vm_mask).wrapping_shl(hwirq_bit); + + if dbi_offset == PCIE_MSI_INTR0_STATUS { + // Status register: write 1 to clear semantics + // Mask first to ensure VM can only clear its own bits + // No need to read hardware value - just write the mapped bits + // Hardware will clear only the bits we write as 1 + // Other domains' pending interrupts remain unaffected + let mut hw_mmio_write = MMIOAccess { + address: mmio.address, + value: hw_value_vm as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_mmio_write); + } else { + // For ENABLE/MASK registers: need to preserve other domain's bits + // Read current hardware value + let mut hw_mmio = MMIOAccess { + address: mmio.address, + value: 0, + size: 4, + is_write: false, + }; + mmio_perform_access(ecam_base, &mut hw_mmio); + let hw_value = hw_mmio.value as u32; - let base = mmio.address as PciConfigAddress - offset + _base as PciConfigAddress; + // Create mask for this domain's MSI bits + let domain_mask = vm_mask.wrapping_shl(hwirq_bit); - let dev: Option = { - let mut guard = zone.write(); - let vbus = guard.vpci_bus_mut(); - if let Some(dev) = vbus.get_device_by_base(base) { - is_dev_belong_to_zone = true; - Some(dev) - } else { - drop(guard); - // Clone Arc first while holding GLOBAL_PCIE_LIST lock, then release it - // This avoids holding multiple locks simultaneously - let dev_clone = { - let global_pcie_list = GLOBAL_PCIE_LIST.lock(); - global_pcie_list - .values() - .find(|dev| { - let dev_guard = dev.read(); - dev_guard.get_base() == base - }) - .cloned() - }; - dev_clone - } - }; + // Update hardware value: clear domain bits, then set new ones + let new_hw_value = + (hw_value & !domain_mask) | (hw_value_vm & domain_mask); - let dev = match dev { - Some(dev) => dev, - None => { - handle_device_not_found(mmio, offset); - return Ok(()); + let mut hw_mmio_write = MMIOAccess { + address: mmio.address, + value: new_hw_value as usize, + size: 4, + is_write: true, + }; + mmio_perform_access(ecam_base, &mut hw_mmio_write); + } + } else { + // Read and convert from hardware perspective to VM perspective + // Read hardware value + let mut hw_mmio = MMIOAccess { + address: mmio.address, + value: 0, + size: 4, + is_write: false, + }; + mmio_perform_access(ecam_base, &mut hw_mmio); + let hw_value = hw_mmio.value as u32; + + // Right shift to get VM perspective and mask + let vm_value = hw_value.wrapping_shr(hwirq_bit) & vm_mask; + mmio.value = vm_value as usize; + } + } + _ => { + // Other DBI registers + mmio_perform_access(ecam_base, mmio); + } + } + } else { + warn!("No MSI domain info found for domain {}", domain_id); + mmio_perform_access(ecam_base, mmio); + } } - }; - let is_root = is_this_root_zone(); - let is_direct = true; // dbi handler uses direct mode + #[cfg(not(feature = "dwc_msi"))] + { + // Without dwc_msi feature, directly pass through MSI register access + mmio_perform_access(ecam_base, mmio); + } + } else { + // warn!("mmio_vpci_handler_dbi read {:#x}", mmio.address); + let offset = (mmio.address & 0xfff) as PciConfigAddress; + let zone = this_zone(); + let mut is_dev_belong_to_zone = false; + + let base = mmio.address as PciConfigAddress - offset + ecam_base as PciConfigAddress; + + let dev: Option = { + let mut guard = zone.write(); + let vbus = guard.vpci_bus_mut(); + if let Some(dev) = vbus.get_device_by_base(base) { + is_dev_belong_to_zone = true; + Some(dev) + } else { + drop(guard); + // Clone Arc first while holding GLOBAL_PCIE_LIST lock, then release it + // This avoids holding multiple locks simultaneously + let dev_clone = { + let global_pcie_list = GLOBAL_PCIE_LIST.lock(); + global_pcie_list + .values() + .find(|dev| { + let dev_guard = dev.read(); + dev_guard.get_base() == base + }) + .cloned() + }; + dev_clone + } + }; - handle_config_space_access(dev, mmio, offset, is_direct, is_root, is_dev_belong_to_zone)?; + let dev = match dev { + Some(dev) => dev, + None => { + handle_device_not_found(mmio, offset); + return Ok(()); + } + }; + + let is_root = is_this_root_zone(); + let is_direct = true; // dbi handler uses direct mode + + handle_config_space_access( + dev, + mmio, + offset, + is_direct, + is_root, + is_dev_belong_to_zone, + )?; + } + } else { + warn!("No extend config found for ecam_base {:#x}", _base); } Ok(()) } +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +static DBI_PCI_INIT_DONE: Lazy>> = + Lazy::new(|| Mutex::new(BTreeMap::new())); + +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +pub fn is_pci_init_done(domain_id: u8) -> bool { + DBI_PCI_INIT_DONE + .lock() + .get(&domain_id) + .copied() + .unwrap_or(false) +} + +#[cfg(all(feature = "dwc_pcie", feature = "pci_init_delay"))] +fn set_pci_init_done(domain_id: u8) { + DBI_PCI_INIT_DONE.lock().insert(domain_id, true); +} + pub fn mmio_vpci_direct_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult { let zone = this_zone(); let offset = (mmio.address & 0xfff) as PciConfigAddress; @@ -1094,3 +2570,192 @@ pub fn mmio_vpci_direct_handler(mmio: &mut MMIOAccess, _base: usize) -> HvResult Ok(()) } + +/// Handle MMIO access to MSIX table in BAR memory +pub fn mmio_msix_table_handler(mmio: &mut MMIOAccess, base: usize) -> HvResult { + let access_offset = mmio.address as u64; + let base_aligned = (base as u64) & !0xf; + + // Find the device matching this BAR's physical address and get domain_id from BDF + let (device_info, _domain_id) = { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + + // Find the device whose MSIX BAR paddr matches the handler base + let mut result = None; + let mut domain_id = 0xFF; + for dev in vbus.devs_ref().values() { + if let Some(msi_info) = dev.read().get_msi_info() { + if let Some(msix) = &msi_info.msix_info { + let msix_bar_aligned = msix.bar_paddr & !0xf; + if msix_bar_aligned == base_aligned { + // Get domain_id from device's BDF + domain_id = dev.read().get_bdf().domain(); + result = Some((dev.clone(), msix.offset, msix.entry_count)); + break; + } + } + } + } + + if result.is_none() { + panic!( + "MSIX table handler could not find device in current zone vPCI bus for BAR base {:#x}", + base_aligned + ); + } + (result, domain_id) + }; + + // Check if this access is within the MSIX table range + if let Some((dev, msix_offset, entry_count)) = device_info { + // let vbdf = dev.get_vbdf(); + + let msix_table_size = (entry_count as u64) * 16; // Each entry is 16 bytes + let msix_table_end = msix_offset + msix_table_size; + + if access_offset >= msix_offset && access_offset < msix_table_end { + // This is a MSIX table access, record it with detailed information + let offset_in_entry = access_offset - msix_offset; + // let entry_index = offset_in_entry / 16; + let field_offset = offset_in_entry % 16; + // let host_bdf = dev.get_bdf(); + // let field_name = match field_offset { + // 0..=3 => "msg_addr_lo", + // 4..=7 => "msg_addr_hi", + // 8..=11 => "msg_data", + // 12..=15 => "vector_ctrl", + // _ => "unknown", + // }; + + if mmio.is_write { + // let vm_value = mmio.value; + match field_offset { + 0..=3 => { + // Save VM's doorbell low 32 bits + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff00000000; + msi_info.set_doorbell(current | (mmio.value as u64)); + }); + + // Replace with hvisor's doorbell before writing to hardware + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + let hw_doorbell_lo = (hw_paddr & 0xffffffff) as usize; + mmio.value = hw_doorbell_lo; + } + } + } + 4..=7 => { + // Save VM's doorbell high 32 bits + dev.with_msi_info_mut(|msi_info| { + let current = msi_info.msi_doorbell & 0xffffffff; + msi_info.set_doorbell(current | ((mmio.value as u64) << 32)); + }); + + // Replace with hvisor's doorbell before writing to hardware + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let hw_paddr = + crate::pci::dwc_msi::get_domain_doorbell_paddr(_domain_id); + let hw_doorbell_hi = ((hw_paddr >> 32) & 0xffffffff) as usize; + mmio.value = hw_doorbell_hi; + } + } + } + 8..=11 => { + // Convert VM vector index to hardware vector index. + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = + vbus.domain_msi_info().get(&_domain_id) + { + let virq_bit = mmio.value as u32; + let hwirq_bit = domain_msi_info.hwirq_bit; + let hw_value = virq_bit.wrapping_add(hwirq_bit); + mmio.value = hw_value as usize; + } + } + } + } + 12..=15 => {} + _ => {} + } + + mmio_perform_access(base, mmio); + return Ok(()); + } else { + let mut hw_mmio = MMIOAccess { + address: mmio.address, + value: 0, + size: mmio.size, + is_write: false, + }; + mmio_perform_access(base, &mut hw_mmio); + let hw_value = hw_mmio.value; + + match field_offset { + 0..=3 => { + let dev_vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi| msi.msi_doorbell) + .unwrap_or(0); + mmio.value = (dev_vm_doorbell & 0xffffffff) as usize; + } + 4..=7 => { + let dev_vm_doorbell = dev + .read() + .get_msi_info() + .map(|msi| msi.msi_doorbell) + .unwrap_or(0); + mmio.value = ((dev_vm_doorbell >> 32) & 0xffffffff) as usize; + } + 8..=11 => { + mmio.value = hw_value; + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + if _domain_id != 0xFF { + let zone = this_zone(); + let guard = zone.read(); + let vbus = guard.vpci_bus(); + if let Some(domain_msi_info) = + vbus.domain_msi_info().get(&_domain_id) + { + let hwirq_bit = domain_msi_info.hwirq_bit; + let hw_vec = hw_value as u32; + let virq_bit = if hw_vec >= hwirq_bit { + hw_vec - hwirq_bit + } else { + hw_vec + }; + mmio.value = virq_bit as usize; + } + } + } + } + 12..=15 => { + mmio.value = hw_value; + } + _ => { + mmio.value = hw_value; + } + } + return Ok(()); + } + } + } + + mmio_perform_access(base, mmio); + + Ok(()) +} diff --git a/src/pci/pci_struct.rs b/src/pci/pci_struct.rs index ad243d1f..9ecefdda 100644 --- a/src/pci/pci_struct.rs +++ b/src/pci/pci_struct.rs @@ -17,7 +17,12 @@ use alloc::{collections::btree_map::BTreeMap, sync::Arc, vec::Vec}; use bit_field::BitField; use bitvec::{array::BitArray, order::Lsb0, BitArr}; -use core::{cmp::Ordering, fmt::Debug, ops::Range, str::FromStr}; +use core::{ + cmp::Ordering, + fmt::Debug, + ops::{Deref, DerefMut, Range}, + str::FromStr, +}; use spin::RwLock; use super::{ @@ -151,6 +156,18 @@ const PCI_EXP_TYPE_UPSTREAM: u16 = 5; const PCI_EXP_TYPE_DOWNSTREAM: u16 = 6; const PCI_EXP_TYPE_PCIE_BRIDGE: u16 = 8; +pub(crate) const SRIOV_CAP_SIZE: PciConfigAddress = 0x40; +const SRIOV_CTRL_OFFSET: PciConfigAddress = 0x08; +const SRIOV_INITIAL_VFS_OFFSET: PciConfigAddress = 0x0c; +const SRIOV_TOTAL_VFS_OFFSET: PciConfigAddress = 0x0e; +const SRIOV_NUM_VFS_OFFSET: PciConfigAddress = 0x10; +const SRIOV_FIRST_VF_OFFSET: PciConfigAddress = 0x14; +const SRIOV_VF_STRIDE_OFFSET: PciConfigAddress = 0x16; +const SRIOV_VF_DEVICE_ID_OFFSET: PciConfigAddress = 0x1a; +pub(crate) const SRIOV_VF_BAR_OFFSET: PciConfigAddress = 0x24; +pub(crate) const SRIOV_VF_BAR_END: PciConfigAddress = SRIOV_VF_BAR_OFFSET + 6 * 4; +const SRIOV_CTRL_VF_ENABLE: u16 = 1 << 0; + #[derive(Clone, Copy, Eq, PartialEq, Default)] pub struct Bdf { pub domain: u8, @@ -189,6 +206,25 @@ impl Bdf { self.function } + pub fn routing_id(&self) -> u16 { + ((self.bus as u16) << 8) | ((self.device as u16) << 3) | (self.function as u16) + } + + pub fn from_routing_id(domain: u8, routing_id: u16) -> Self { + Self { + domain, + bus: ((routing_id >> 8) & 0xff) as u8, + device: ((routing_id >> 3) & 0x1f) as u8, + function: (routing_id & 0x7) as u8, + } + } + + pub fn add_routing_id_offset(&self, offset: u16) -> Option { + self.routing_id() + .checked_add(offset) + .map(|routing_id| Self::from_routing_id(self.domain, routing_id)) + } + pub fn is_host_bridge(&self, bus_begin: u8) -> bool { if (self.bus, self.device, self.function) == (bus_begin, 0, 0) { true @@ -316,14 +352,19 @@ impl VirtualPciAccessBits { let mut bits = BitArray::ZERO; bits[0x0..0x4].fill(true); // ID bits[0x08..0x0c].fill(true); // CLASS - bits[0x10..0x34].fill(true); //bar and rom + bits[0x10..0x34].fill(true); // BARs and ROM + bits[0x34..0x38].fill(true); // Capability Pointer + bits[0x40..0x100].fill(true); // Capability region (caps start at 0x40) Self { bits } } pub fn bridge() -> Self { - Self { - bits: BitArray::ZERO, - } + let mut bits = BitArray::ZERO; + bits[0x10..0x18].fill(true); // BARs + bits[0x38..0x3c].fill(true); // ROM + bits[0x34..0x38].fill(true); // Capability Pointer + bits[0x40..0x100].fill(true); // Capability region (caps start at 0x40) + Self { bits } } pub fn host_bridge() -> Self { @@ -343,6 +384,106 @@ impl VirtualPciAccessBits { } } +#[derive(Clone, Copy, Debug)] +pub struct MsixInfo { + pub bar_id: u8, + pub offset: u64, + pub entry_count: u32, // number of MSIX table entries + pub bar_paddr: u64, // physical address of the BAR +} + +#[derive(Clone, Debug)] +pub struct MsiInfo { + pub msi_count: u32, + // doorbell vm write to trigger interrupt + pub msi_doorbell: u64, + pub msix_info: Option, +} + +impl MsiInfo { + pub fn new(msi_count: u32) -> Self { + Self { + msi_count, + msi_doorbell: 0, + msix_info: None, + } + } + + pub fn set_doorbell(&mut self, doorbell: u64) { + self.msi_doorbell = doorbell; + } + + pub fn set_msix_info(&mut self, bar_id: u8, offset: u64, entry_count: u32, bar_paddr: u64) { + self.msix_info = Some(MsixInfo { + bar_id, + offset, + entry_count, + bar_paddr, + }); + } +} + +#[derive(Clone, Copy, Debug)] +pub struct SriovVfInfo { + pub pf_bdf: Bdf, + pub vf_index: u16, +} + +#[derive(Clone, Debug)] +pub struct SriovInfo { + pub cap_offset: PciConfigAddress, + pub initial_vfs: u16, + pub total_vfs: u16, + pub enabled_vfs: u16, + pub first_vf_offset: u16, + pub vf_stride: u16, + pub vf_device_id: DeviceId, + pub vf_bars: Bar, + pub vf_bdfs: Vec, +} + +impl SriovInfo { + pub fn new( + cap_offset: PciConfigAddress, + initial_vfs: u16, + total_vfs: u16, + first_vf_offset: u16, + vf_stride: u16, + vf_device_id: DeviceId, + vf_bars: Bar, + vf_bdfs: Vec, + ) -> Self { + Self { + cap_offset, + initial_vfs, + total_vfs, + enabled_vfs: 0, + first_vf_offset, + vf_stride, + vf_device_id, + vf_bars, + vf_bdfs, + } + } +} + +/// Information needed to splice the SR-IOV extended capability out of the +/// PCIe ext-cap linked list when presenting config space to a guest VM. +/// Populated during `ext_capability_enumerate` when the `sriov` feature is +/// disabled so that the SR-IOV cap is invisible to guests. +#[derive(Clone, Debug)] +pub struct HideSriovInfo { + /// Absolute config-space offset of the SR-IOV extended capability header. + pub sriov_cap_offset: PciConfigAddress, + /// The `next` pointer stored inside the SR-IOV cap header (bits\[31:20\]). + /// This is the cap that should follow SR-IOV in the list. + pub sriov_cap_next: PciConfigAddress, + /// Offset of the preceding ext-cap node (the one whose `next` pointer + /// currently points to `sriov_cap_offset`). `None` when SR-IOV is the + /// first extended capability (at offset 0x100). + pub prev_cap_offset: Option, +} + /* VirtualPciConfigSpace * bdf: the bdf hvisor seeing(same with the bdf without hvisor) * vbdf: the bdf zone seeing, it can set just you like without sr-iov @@ -355,6 +496,7 @@ impl VirtualPciAccessBits { pub struct VirtualPciConfigSpace { host_bdf: Bdf, parent_bdf: Bdf, + parent_bus: u8, bdf: Bdf, vbdf: Bdf, config_type: HeaderType, @@ -370,8 +512,21 @@ pub struct VirtualPciConfigSpace { bararr: Bar, rom: PciMem, capabilities: PciCapabilityList, + ext_capabilities: PciExtCapabilityList, dev_type: VpciDevType, + + // MSI/MSIX info for this device + msi_info: Option, + + // for SR-IOV PF + sriov_info: Option, + + // for SR-IOV VF + sriov_vf_info: Option, + + // SR-IOV cap hide info (populated when `sriov` feature is disabled) + hide_sriov: Option, msix_table: Option>>, } @@ -433,6 +588,14 @@ impl ArcRwLockVirtualPciConfigSpace { self.read().get_vbdf() } + pub fn get_parent_bdf(&self) -> Bdf { + self.read().get_parent_bdf() + } + + pub fn get_parent_bus(&self) -> u8 { + self.read().get_parent_bus() + } + pub fn get_dev_type(&self) -> VpciDevType { self.read().get_dev_type() } @@ -541,6 +704,46 @@ impl ArcRwLockVirtualPciConfigSpace { f(&guard.capabilities) } + pub fn with_msi_info(&self, f: F) -> Option + where + F: FnOnce(&MsiInfo) -> R, + { + let guard = self.0.read(); + guard.msi_info.as_ref().map(|msi_info| f(msi_info)) + } + + pub fn with_msi_info_mut(&self, f: F) -> Option + where + F: FnOnce(&mut MsiInfo) -> R, + { + let mut guard = self.0.write(); + guard.msi_info.as_mut().map(|msi_info| f(msi_info)) + } + + pub fn with_sriov_info(&self, f: F) -> Option + where + F: FnOnce(&SriovInfo) -> R, + { + let guard = self.0.read(); + guard.sriov_info.as_ref().map(|sriov_info| f(sriov_info)) + } + + pub fn with_sriov_info_mut(&self, f: F) -> Option + where + F: FnOnce(&mut SriovInfo) -> R, + { + let mut guard = self.0.write(); + guard.sriov_info.as_mut().map(|sriov_info| f(sriov_info)) + } + + pub fn with_hide_sriov(&self, f: F) -> Option + where + F: FnOnce(&HideSriovInfo) -> R, + { + let guard = self.0.read(); + guard.hide_sriov.as_ref().map(|info| f(info)) + } + pub fn read(&self) -> spin::RwLockReadGuard<'_, VirtualPciConfigSpaceWithZone> { self.0.read() } @@ -591,6 +794,207 @@ impl Debug for ArcRwLockVirtualPciConfigSpace { // } impl VirtualPciConfigSpace { + fn find_ext_cap_offset(&self, cap_type: ExtCapabilityType) -> Option { + self.ext_capabilities + .iter() + .find_map(|(offset, cap)| (cap.cap_type == cap_type).then_some(*offset)) + } + + fn parse_sriov_vf_bars(&self, cap_offset: PciConfigAddress) -> HvResult { + let mut bararr = Bar::default(); + let mut slot = 0usize; + + while slot < 6 { + let bar_offset = cap_offset + SRIOV_VF_BAR_OFFSET + (slot as PciConfigAddress) * 4; + let value = self.backend.read(bar_offset, 4)? as u32; + + if !value.get_bit(0) { + let prefetchable = value.get_bit(3); + + match value.get_bits(1..3) { + 0b00 => { + let size = { + self.backend.write(bar_offset, 4, 0xffff_ffffusize)?; + let mut readback = self.backend.read(bar_offset, 4)? as u32; + self.backend.write(bar_offset, 4, value as usize)?; + + if readback == 0 { + slot += 1; + continue; + } + + readback.set_bits(0..4, 0); + 1u64 << readback.trailing_zeros() + }; + + bararr[slot] = + PciMem::new_bar(PciMemType::Mem32, value as u64, size, prefetchable); + } + 0b10 => { + if slot == 5 { + warn!("SR-IOV VF BAR64 low part appears in BAR5"); + break; + } + + let high_offset = bar_offset + 4; + let value_high = self.backend.read(high_offset, 4)? as u32; + let size = { + self.backend.write(bar_offset, 4, 0xffff_ffffusize)?; + self.backend.write(high_offset, 4, 0xffff_ffffusize)?; + let mut readback_low = self.backend.read(bar_offset, 4)? as u32; + let readback_high = self.backend.read(high_offset, 4)? as u32; + self.backend.write(bar_offset, 4, value as usize)?; + self.backend.write(high_offset, 4, value_high as usize)?; + + readback_low.set_bits(0..4, 0); + + if readback_low != 0 { + 1u64 << readback_low.trailing_zeros() + } else { + 1u64 << (readback_high.trailing_zeros() + 32) + } + }; + let value64 = (value as u64) | ((value_high as u64) << 32); + + bararr[slot] = + PciMem::new_bar(PciMemType::Mem64Low, value64, size, prefetchable); + bararr[slot + 1] = + PciMem::new_bar(PciMemType::Mem64High, value64, size, prefetchable); + slot += 1; + } + _ => { + warn!( + "unsupported SR-IOV VF BAR type bits {:b}", + value.get_bits(1..3) + ); + } + } + } else { + let size = { + self.backend.write(bar_offset, 4, 0xffff_ffffusize)?; + let mut readback = self.backend.read(bar_offset, 4)? as u32; + self.backend.write(bar_offset, 4, value as usize)?; + + readback.set_bit(0, false); + if readback == 0 { + slot += 1; + continue; + } + + 1u64 << readback.trailing_zeros() + }; + bararr[slot] = PciMem::new_io(value as u64, size); + } + + slot += 1; + } + + Ok(bararr) + } + + pub fn build_sriov_info(&mut self) -> HvResult<()> { + let Some(cap_offset) = self.find_ext_cap_offset(ExtCapabilityType::SingleRootIov) else { + return Ok(()); + }; + + let initial_vfs = self + .backend + .read(cap_offset + SRIOV_INITIAL_VFS_OFFSET, 2)? as u16; + let total_vfs = self.backend.read(cap_offset + SRIOV_TOTAL_VFS_OFFSET, 2)? as u16; + let first_vf_offset = self.backend.read(cap_offset + SRIOV_FIRST_VF_OFFSET, 2)? as u16; + let vf_stride = self.backend.read(cap_offset + SRIOV_VF_STRIDE_OFFSET, 2)? as u16; + let vf_device_id = self + .backend + .read(cap_offset + SRIOV_VF_DEVICE_ID_OFFSET, 2)? as u16; + + if total_vfs == 0 || first_vf_offset == 0 || vf_stride == 0 { + return Ok(()); + } + + let vf_bars = self.parse_sriov_vf_bars(cap_offset)?; + let mut vf_bdfs = Vec::with_capacity(total_vfs as usize); + for vf_index in 0..total_vfs { + let route_offset = match first_vf_offset.checked_add(vf_stride.saturating_mul(vf_index)) + { + Some(offset) => offset, + None => break, + }; + if let Some(vf_bdf) = self.bdf.add_routing_id_offset(route_offset) { + vf_bdfs.push(vf_bdf); + } else { + break; + } + } + + if vf_bdfs.is_empty() { + return Ok(()); + } + + self.with_access_mut(|access| { + access.set_bits(cap_offset as usize..(cap_offset as usize + 0x40)); + }); + + self.backend + .write(cap_offset + SRIOV_NUM_VFS_OFFSET, 2, total_vfs as usize)?; + let ctrl = self.backend.read(cap_offset + SRIOV_CTRL_OFFSET, 2)? as u16; + self.backend.write( + cap_offset + SRIOV_CTRL_OFFSET, + 2, + (ctrl | SRIOV_CTRL_VF_ENABLE) as usize, + )?; + + let mut sriov_info = SriovInfo::new( + cap_offset, + initial_vfs, + total_vfs, + first_vf_offset, + vf_stride, + vf_device_id, + vf_bars, + vf_bdfs, + ); + sriov_info.enabled_vfs = total_vfs; + self.sriov_info = Some(sriov_info); + + Ok(()) + } + + pub fn create_sriov_vf( + &self, + vf_bdf: Bdf, + vf_index: u16, + backend_base: PciConfigAddress, + pci_addr_base: PciConfigAddress, + ) -> Option { + let sriov_info = self.sriov_info.as_ref()?; + let mut vf = Self::endpoint( + vf_bdf, + pci_addr_base, + Arc::new(EndpointHeader::new_with_region(PciConfigMmio::new( + backend_base, + CONFIG_LENTH, + ))), + sriov_info.vf_bars.clone(), + PciMem::default(), + self.config_value.get_class_and_revision_id(), + (sriov_info.vf_device_id, self.config_value.get_id().1), + ); + + vf.set_host_bdf(vf_bdf); + vf.set_parent_bdf(self.bdf); + vf.set_parent_bus(self.bdf.bus()); + vf.set_sriov_vf_info(Some(SriovVfInfo { + pf_bdf: self.bdf, + vf_index, + })); + vf.config_value_init(); + vf.capability_enumerate(); + vf.ext_capability_enumerate(); + vf.build_msi_info(); + + Some(vf) + } + /* false: some bits ro */ pub fn writable(&self, offset: PciConfigAddress, size: usize) -> bool { self.control.bits[offset as usize..offset as usize + size] @@ -765,8 +1169,19 @@ impl Debug for VirtualPciConfigSpace { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, - "\n bdf {:#?}\n base {:#x}\n type {:#?}\n {:#?}\n {:#?}\n {:#?}", - self.bdf, self.base, self.config_type, self.bararr, self.rom, self.capabilities + "\n bdf {:#?}\n parent_bdf {:#?}\n base {:#x}\n type {:#?}\n msi_info {:#x?}\n sriov_info {:#x?}\n sriov_vf_info {:#x?}\n hide_sriov {:#x?}\n {:#?}\n {:#?}\n {:#?}\n {:#?}", + self.bdf, + self.parent_bdf, + self.base, + self.config_type, + self.msi_info, + self.sriov_info, + self.sriov_vf_info, + self.hide_sriov, + self.bararr, + self.rom, + self.capabilities, + self.ext_capabilities ) } } @@ -783,6 +1198,7 @@ impl VirtualPciConfigSpace { Self { host_bdf: Bdf::default(), parent_bdf: Bdf::default(), + parent_bus: 0, bdf, vbdf: bdf, config_type: HeaderType::Endpoint, @@ -797,7 +1213,12 @@ impl VirtualPciConfigSpace { bararr, rom: PciMem::default(), capabilities: PciCapabilityList::new(), + ext_capabilities: PciExtCapabilityList::new(), dev_type, + msi_info: None, + sriov_info: None, + sriov_vf_info: None, + hide_sriov: None, msix_table, } } @@ -814,6 +1235,7 @@ impl VirtualPciConfigSpace { Self { host_bdf: Bdf::default(), parent_bdf: Bdf::default(), + parent_bus: 0, bdf, vbdf: Bdf::default(), config_type: HeaderType::Endpoint, @@ -825,7 +1247,12 @@ impl VirtualPciConfigSpace { bararr, rom, capabilities: PciCapabilityList::new(), + ext_capabilities: PciExtCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, + sriov_info: None, + sriov_vf_info: None, + hide_sriov: None, msix_table: None, } } @@ -842,6 +1269,7 @@ impl VirtualPciConfigSpace { Self { host_bdf: Bdf::default(), parent_bdf: Bdf::default(), + parent_bus: 0, bdf, vbdf: Bdf::default(), config_type: HeaderType::PciBridge, @@ -853,7 +1281,12 @@ impl VirtualPciConfigSpace { bararr, rom, capabilities: PciCapabilityList::new(), + ext_capabilities: PciExtCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, + sriov_info: None, + sriov_vf_info: None, + hide_sriov: None, msix_table: None, } } @@ -871,6 +1304,7 @@ impl VirtualPciConfigSpace { Self { host_bdf: Bdf::default(), parent_bdf: Bdf::default(), + parent_bus: 0, bdf, vbdf: Bdf::default(), config_type: HeaderType::Endpoint, @@ -883,7 +1317,12 @@ impl VirtualPciConfigSpace { bararr: Bar::default(), rom: PciMem::default(), capabilities: PciCapabilityList::new(), + ext_capabilities: PciExtCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, + sriov_info: None, + sriov_vf_info: None, + hide_sriov: None, msix_table: None, } } @@ -897,6 +1336,7 @@ impl VirtualPciConfigSpace { Self { host_bdf: bdf, parent_bdf: bdf, + parent_bus: bdf.bus(), bdf: bdf, vbdf: bdf, config_type: HeaderType::Endpoint, @@ -908,7 +1348,12 @@ impl VirtualPciConfigSpace { bararr: Bar::default(), rom: PciMem::default(), capabilities: PciCapabilityList::new(), + ext_capabilities: PciExtCapabilityList::new(), dev_type: VpciDevType::Physical, + msi_info: None, + sriov_info: None, + sriov_vf_info: None, + hide_sriov: None, msix_table: None, } } @@ -925,6 +1370,10 @@ impl VirtualPciConfigSpace { self.parent_bdf = parent_bdf; } + pub fn set_parent_bus(&mut self, parent_bus: u8) { + self.parent_bus = parent_bus; + } + pub fn get_bdf(&self) -> Bdf { self.bdf } @@ -933,6 +1382,14 @@ impl VirtualPciConfigSpace { self.vbdf } + pub fn get_parent_bdf(&self) -> Bdf { + self.parent_bdf + } + + pub fn get_parent_bus(&self) -> u8 { + self.parent_bus + } + pub fn get_config_type(&self) -> HeaderType { self.config_type } @@ -945,6 +1402,87 @@ impl VirtualPciConfigSpace { self.base } + pub fn get_msi_count(&self) -> u32 { + self.msi_info + .as_ref() + .map(|info| info.msi_count) + .unwrap_or(0) + } + + /// Build MSI/MSIX info structure based on device capabilities + pub fn build_msi_info(&mut self) { + let mut msi_count = 0u32; + let mut msix_count = 0u32; + let mut msix_bar_id = 0u8; + let mut msix_offset = 0u64; + let mut has_msix = false; + + // Check if the device has MSI or MSIX capability and calculate both + for (_offset, cap) in self.capabilities.cap_in_config_ref().iter() { + match cap.get_type() { + CapabilityType::Msi => { + // For MSI: read offset+2, Message Control bits 3:1 contain MMC + // Supported messages = 2^MMC + if let Ok(val) = cap.with_region(|region| region.read(0x02, 2)) { + let mmc = (val & 0x0E) >> 1; // bits 3:1 + msi_count = 1u32 << mmc; + } + } + CapabilityType::MsiX => { + // For MSIX: read offset+2, bits 10-0 contain table size + // Supported messages = table_size + 1 + if let Ok(val) = cap.with_region(|region| region.read(0x02, 2)) { + let table_size = (val & 0x07FF) as u32; // bits 10-0 + msix_count = table_size + 1; + } + + // Extract MSIX table location (offset+4) + // Bits 2-0: BAR ID (0-5), Bits 31-3: table offset + if let Ok(table_info) = cap.with_region(|region| region.read(0x04, 4)) { + msix_bar_id = (table_info & 0x07) as u8; + msix_offset = ((table_info >> 3) as u64) << 3; // multiply by 8 since offset is in 8-byte increments + has_msix = true; + } + } + _ => {} + } + } + + // Create MsiInfo if device has MSI or MSIX capability + let interrupt_count = core::cmp::max(msi_count, msix_count); + if interrupt_count > 0 { + let mut msi_info = MsiInfo::new(interrupt_count); + + if has_msix { + // Read the BAR's physical address + let bar_paddr = self.bararr[msix_bar_id as usize].get_value64() & !0xf; + msi_info.set_msix_info(msix_bar_id, msix_offset, msix_count, bar_paddr); + } + + self.msi_info = Some(msi_info); + } + } + + pub fn get_msi_info(&self) -> Option<&MsiInfo> { + self.msi_info.as_ref() + } + + pub fn get_sriov_info(&self) -> Option<&SriovInfo> { + self.sriov_info.as_ref() + } + + pub fn get_sriov_vf_info(&self) -> Option { + self.sriov_vf_info + } + + pub fn set_sriov_info(&mut self, sriov_info: Option) { + self.sriov_info = sriov_info; + } + + pub fn set_sriov_vf_info(&mut self, sriov_vf_info: Option) { + self.sriov_vf_info = sriov_vf_info; + } + /* now the space_init just with bar * Note: space field removed, bar values are cached in config_value.bar_value */ @@ -1172,7 +1710,7 @@ impl PciIterator { } let mut ep = EndpointHeader::new_with_region(region); - let rom = Self::rom_init(&mut ep); + let rom = Self::rom_init(&mut self.allocator, &mut ep); let bararr = Self::bar_mem_init(ep.bar_limit().into(), &mut self.allocator, &mut ep); @@ -1191,14 +1729,18 @@ impl PciIterator { ); let _ = node.capability_enumerate(); + node.ext_capability_enumerate(); + #[cfg(feature = "sriov")] + let _ = node.build_sriov_info(); + // Build MSI/MSIX info once during device discovery + node.build_msi_info(); Some(node) } HeaderType::PciBridge => { // For bridge: don't push host_bridge, it will be handled in Iterator::next() - warn!("bridge"); let mut bridge = PciBridgeHeader::new_with_region(region); - let rom = Self::rom_init(&mut bridge); + let rom = Self::rom_init(&mut self.allocator, &mut bridge); let bararr = Self::bar_mem_init(bridge.bar_limit().into(), &mut self.allocator, &mut bridge); @@ -1215,6 +1757,11 @@ impl PciIterator { ); let _ = node.capability_enumerate(); + node.ext_capability_enumerate(); + #[cfg(feature = "sriov")] + let _ = node.build_sriov_info(); + // Build MSI/MSIX info once during device discovery + node.build_msi_info(); Some(node) } @@ -1231,11 +1778,28 @@ impl PciIterator { } } - fn rom_init(dev: &mut D) -> PciMem { + fn rom_init( + allocator: &mut Option, + dev: &mut D, + ) -> PciMem { let mut rom = dev.parse_rom(); if rom.get_type() == PciMemType::Rom { - rom.set_value(rom.get_value() as u64); - rom.set_virtual_value(rom.get_value() as u64); + if let Some(a) = allocator { + let value = a.alloc_memory32(rom.get_size() as u64).unwrap(); + rom.set_value(value); + rom.set_virtual_value(value); + // Do not enable ROM yet, write 0 (ROM disabled) + // VM will enable it later by writing address + enable bit + // info!( + // "allocated rom address: {:#x}, write 0 (disabled) to hardware", + // value + // ); + let _ = dev.write(dev.rom_offset(), 4, 0 as _); + } else { + let value = rom.get_value() as u64; + rom.set_value(value); + rom.set_virtual_value(value); + } } rom } @@ -1247,7 +1811,7 @@ impl PciIterator { ) -> Bar { let mut bararr = dev.parse_bar(); - info!("{:#?}", bararr); + // info!("{:#?}", bararr); if let Some(a) = allocator { dev.update_command(|mut cmd| { @@ -1328,7 +1892,7 @@ impl PciIterator { fn next_device_not_ok(&mut self) -> bool { if let Some(parent) = self.stack.last_mut() { // only one child and skip this bus - if parent.has_secondary_link { + if parent.has_only_one_child { parent.device = MAX_DEVICE; } @@ -1413,9 +1977,10 @@ impl Iterator for PciIterator { let parent = self.stack.last().unwrap(); // Safe because we just ensured it exists let host_bdf = Bdf::new(domain, bus_begin, 0, 0); let parent_bdf = Bdf::new(domain, parent.bus, parent.device, 0); - let parent_bus = parent.primary_bus; + let _parent_bus = parent.primary_bus; node.set_host_bdf(host_bdf); node.set_parent_bdf(parent_bdf); + node.set_parent_bus(_parent_bus); self.next(match node.config_value.get_class().0 { // class code 0x6 is bridge and class.1 0x0 is host bridge 0x6 if node.config_value.get_class().1 == 0x4 => { @@ -1471,7 +2036,7 @@ impl Iterator for PciIterator { let immediate_parent_bus = parent.bus; Some(self.get_bridge().next_bridge( self.address(immediate_parent_bus, bdf), - node.has_secondary_link(), + node.has_only_one_child(), self.is_mulitple_function, self.function, next_bus, @@ -1497,7 +2062,7 @@ pub struct Bridge { secondary_bus: u8, primary_bus: u8, mmio: PciConfigMmio, - has_secondary_link: bool, + has_only_one_child: bool, is_mulitple_function: bool, } @@ -1513,7 +2078,7 @@ impl Bridge { secondary_bus: 0, primary_bus: 0, mmio: PciConfigMmio::new(0, 0), // Dummy mmio for placeholder - has_secondary_link: false, + has_only_one_child: false, is_mulitple_function: false, } } @@ -1532,7 +2097,7 @@ impl Bridge { secondary_bus: bus_begin, primary_bus: bus_begin, mmio: PciConfigMmio::new(address, CONFIG_LENTH), - has_secondary_link: false, + has_only_one_child: false, is_mulitple_function, } } @@ -1540,7 +2105,7 @@ impl Bridge { pub fn next_bridge( &self, address: PciConfigAddress, - has_secondary_link: bool, + has_only_one_child: bool, is_mulitple_function: bool, function: u8, target_bus: u8, @@ -1554,7 +2119,7 @@ impl Bridge { secondary_bus: target_bus, primary_bus: self.bus, mmio, - has_secondary_link, + has_only_one_child, is_mulitple_function, } } @@ -1583,8 +2148,8 @@ impl Bridge { } } - pub fn set_has_secondary_link(&mut self, value: bool) { - self.has_secondary_link = value; + pub fn set_has_only_one_child(&mut self, value: bool) { + self.has_only_one_child = value; } } @@ -1624,12 +2189,77 @@ impl RootComplex { ) -> PciIterator { self.__enumerate(range, domain, bar_alloc) } + + pub fn create_sriov_vfs(&self, pf: &VirtualPciConfigSpace) -> Vec { + let Some(sriov_info) = pf.get_sriov_info() else { + return Vec::new(); + }; + + sriov_info + .vf_bdfs + .iter() + .copied() + .enumerate() + .filter_map(|(vf_index, vf_bdf)| { + let backend_base = self + .accessor + .get_physical_address(vf_bdf, 0, pf.get_bdf().bus()) + .unwrap_or(0); + let pci_addr_base = self.accessor.get_pci_addr_base(vf_bdf).unwrap_or(0); + pf.create_sriov_vf(vf_bdf, vf_index as u16, backend_base, pci_addr_base) + }) + .collect() + } +} + +#[derive(Debug)] +/// MSI information for a specific domain in a VM +/// Tracks the MSI interrupts needed for this domain and the hardware base interrupt bit +pub struct DomainMsiInfo { + /// Total number of MSI interrupts needed for all devices in this domain + pub msi_count: u32, + /// Hardware MSI base bit index (allocated from domain allocator) + pub hwirq_bit: u32, + /// Virtual doorbell address set by the VM (PCIE_MSI_ADDR_LO + PCIE_MSI_ADDR_HI) + pub vm_doorbell_addr: u64, +} + +impl DomainMsiInfo { + pub fn new(msi_count: u32, hwirq_bit: u32) -> Self { + Self { + msi_count, + hwirq_bit, + vm_doorbell_addr: 0, + } + } + + /// Set the virtual doorbell address (from VM) + pub fn set_vm_doorbell(&mut self, addr: u64) { + self.vm_doorbell_addr = addr; + } + + /// Get the virtual doorbell address + pub fn get_vm_doorbell(&self) -> u64 { + self.vm_doorbell_addr + } + + /// Get MSI mask based on msi_count + /// Returns a mask with msi_count bits set (0-based, e.g. msi_count=4 -> mask=0xf) + pub fn get_msi_mask(&self) -> u32 { + if self.msi_count >= 32 { + 0xffffffff + } else { + (1u32 << self.msi_count) - 1 + } + } } #[derive(Debug)] pub struct VirtualRootComplex { devs: BTreeMap, base_to_bdf: BTreeMap, + // MSI interrupt information per domain (domain_id -> DomainMsiInfo) + domain_msi_info: BTreeMap, accessor: Option>, msix_backend: Option>>, } @@ -1639,6 +2269,7 @@ impl VirtualRootComplex { Self { devs: BTreeMap::new(), base_to_bdf: BTreeMap::new(), + domain_msi_info: BTreeMap::new(), accessor: None, msix_backend: None, } @@ -1648,15 +2279,23 @@ impl VirtualRootComplex { self.accessor = Some(accessor); } + pub fn accessor(&self) -> Option<&Arc> { + self.accessor.as_ref() + } + pub fn insert( &mut self, bdf: Bdf, dev: VirtualPciConfigSpace, ) -> Option { - let parent_bus = dev.parent_bdf.bus(); - let offset = 0; + let _parent_bus = dev.parent_bdf.bus(); + let _offset = 0; let base = if let Some(accessor) = &self.accessor { - match accessor.get_physical_address(bdf, offset, parent_bus) { + #[cfg(feature = "dwc_pcie")] + let addr = accessor.get_pci_addr_base(bdf); + #[cfg(not(feature = "dwc_pcie"))] + let addr = accessor.get_physical_address(bdf, _offset, _parent_bus); + match addr { Ok(addr) => addr, Err(_) => { warn!("can not get physical address for device {:#?}(vbdf), reset device base same to hardware", bdf); @@ -1671,12 +2310,69 @@ impl VirtualRootComplex { self.base_to_bdf.insert(base, bdf); self.devs .insert(bdf, ArcRwLockVirtualPciConfigSpace::new(dev)) + + // let base = dev.get_base(); + // let host_bdf = dev.get_bdf(); + // let vbdf = dev.get_vbdf(); + + // #[cfg(feature = "dwc_pcie")] + // let key = { + // let bus = bdf.bus() as PciConfigAddress; + // let device = bdf.device() as PciConfigAddress; + // let function = bdf.function() as PciConfigAddress; + // let pci_addr = (bus << 24) + (device << 19) + (function << 16); + // if bus != 0 { + // pci_addr + // } else { + // base + // } + // }; + + // #[cfg(not(feature = "dwc_pcie"))] + // let key = base; + + // #[cfg(feature = "dwc_pcie")] + // { + // let bus = bdf.bus() as PciConfigAddress; + // let device = bdf.device() as PciConfigAddress; + // let function = bdf.function() as PciConfigAddress; + // let pci_addr = (bus << 24) + (device << 19) + (function << 16); + // info!( + // "vpci insert: base_to_bdf[{:#x}] = key_bdf {:#?}, source {}, base {:#x}, pci_addr {:#x}, dev_host_bdf {:#?}, dev_vbdf {:#?}, remapped {}", + // key, + // bdf, + // if key == pci_addr { "pci_addr" } else { "base" }, + // base, + // pci_addr, + // host_bdf, + // vbdf, + // host_bdf != vbdf + // ); + // } + + // #[cfg(not(feature = "dwc_pcie"))] + // info!( + // "vpci insert: base_to_bdf[{:#x}] = key_bdf {:#?}, source base, base {:#x}, dev_host_bdf {:#?}, dev_vbdf {:#?}, remapped {}", + // key, + // bdf, + // base, + // host_bdf, + // vbdf, + // host_bdf != vbdf + // ); + // self.base_to_bdf.insert(key, bdf); + // self.devs + // .insert(bdf, ArcRwLockVirtualPciConfigSpace::new(dev)) } pub fn devs(&mut self) -> &mut BTreeMap { &mut self.devs } + pub fn devs_ref(&self) -> &BTreeMap { + &self.devs + } + pub fn read_devs(&self) -> &BTreeMap { &self.devs } @@ -1698,6 +2394,34 @@ impl VirtualRootComplex { self.devs.get(&bdf).cloned() } + /// Add MSI count for a specific domain with allocated hardware interrupt bit + pub fn add_msi_count_for_domain(&mut self, domain: u8, msi_count: u32, hwirq_bit: u32) { + let vm_doorbell = self + .domain_msi_info + .get(&domain) + .map(|info| info.get_vm_doorbell()) + .unwrap_or(0); + + let mut info = DomainMsiInfo::new(msi_count, hwirq_bit); + info.set_vm_doorbell(vm_doorbell); + self.domain_msi_info.insert(domain, info); + } + + /// Get MSI info for a specific domain + pub fn get_domain_msi_info(&self, domain: u8) -> Option<&DomainMsiInfo> { + self.domain_msi_info.get(&domain) + } + + /// Get reference to domain MSI info map + pub fn domain_msi_info(&self) -> &BTreeMap { + &self.domain_msi_info + } + + /// Get mutable reference to domain MSI info map + pub fn domain_msi_info_mut(&mut self) -> &mut BTreeMap { + &mut self.domain_msi_info + } + pub fn get_msix_backend(&self) -> Option>> { self.msix_backend.clone() } @@ -1871,6 +2595,243 @@ impl CapabilityType { } } +// ---- PCIe Extended Capabilities (config space 0x100–0xFFF) ---- + +/// PCIe Extended Capability IDs (PCI-SIG ECN). +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ExtCapabilityType { + /// Advanced Error Reporting, ID = 0x0001 + AdvancedErrorReporting, + /// Virtual Channel, ID = 0x0002 + VirtualChannel, + /// Device Serial Number, ID = 0x0003 + DeviceSerialNumber, + /// Power Budgeting, ID = 0x0004 + PowerBudgeting, + /// Root Complex Link Declaration, ID = 0x0005 + RootComplexLinkDeclaration, + /// Root Complex Internal Link Control, ID = 0x0006 + RootComplexInternalLinkControl, + /// Root Complex Event Collector Endpoint Association, ID = 0x0007 + RootComplexEventCollector, + /// Multi-Function Virtual Channel, ID = 0x0008 + MultiFunctionVirtualChannel, + /// VC in Multi-Function Device, ID = 0x0009 + VirtualChannelMFVC, + /// Root Complex Register Block, ID = 0x000A + RootComplexRegisterBlock, + /// Vendor-Specific Extended Capability, ID = 0x000B + VendorSpecific, + /// Configuration Access Correlation, ID = 0x000C + ConfigurationAccessCorrelation, + /// Access Control Services, ID = 0x000D + AccessControlServices, + /// Alternative Routing-ID Interpretation, ID = 0x000E + AlternativeRoutingId, + /// Address Translation Services, ID = 0x000F + AddressTranslationServices, + /// Single Root I/O Virtualization (SR-IOV), ID = 0x0010 + SingleRootIov, + /// Multi-Root I/O Virtualization (MR-IOV), ID = 0x0011 + MultiRootIov, + /// Multicast, ID = 0x0012 + Multicast, + /// Page Request Interface, ID = 0x0013 + PageRequestInterface, + /// Resizable BAR, ID = 0x0015 + ResizableBar, + /// Dynamic Power Allocation, ID = 0x0016 + DynamicPowerAllocation, + /// TPH Requester, ID = 0x0017 + TphRequester, + /// Latency Tolerance Reporting, ID = 0x0018 + LatencyToleranceReporting, + /// Secondary PCI Express, ID = 0x0019 + SecondaryPciExpress, + /// Protocol Multiplexing, ID = 0x001A + ProtocolMultiplexing, + /// Process Address Space ID (PASID), ID = 0x001B + ProcessAddressSpaceId, + /// LN Requester, ID = 0x001C + LnRequester, + /// Downstream Port Containment, ID = 0x001D + DownstreamPortContainment, + /// L1 PM Substates, ID = 0x001E + L1PmSubstates, + /// Precision Time Measurement, ID = 0x001F + PrecisionTimeMeasurement, + /// Designated Vendor-Specific, ID = 0x0023 + DesignatedVendorSpecific, + /// VF Resizable BAR, ID = 0x0024 + VfResizableBar, + /// Data Link Feature, ID = 0x0025 + DataLinkFeature, + /// Physical Layer 16.0 GT/s, ID = 0x0026 + PhysicalLayer16Gts, + /// Lane Margining at the Receiver, ID = 0x0027 + LaneMargining, + /// Physical Layer 32.0 GT/s, ID = 0x002A + PhysicalLayer32Gts, + /// Unknown or reserved extended capability + Unknown(u16), +} + +impl ExtCapabilityType { + pub fn from_id(id: u16) -> Self { + match id { + 0x0001 => ExtCapabilityType::AdvancedErrorReporting, + 0x0002 => ExtCapabilityType::VirtualChannel, + 0x0003 => ExtCapabilityType::DeviceSerialNumber, + 0x0004 => ExtCapabilityType::PowerBudgeting, + 0x0005 => ExtCapabilityType::RootComplexLinkDeclaration, + 0x0006 => ExtCapabilityType::RootComplexInternalLinkControl, + 0x0007 => ExtCapabilityType::RootComplexEventCollector, + 0x0008 => ExtCapabilityType::MultiFunctionVirtualChannel, + 0x0009 => ExtCapabilityType::VirtualChannelMFVC, + 0x000A => ExtCapabilityType::RootComplexRegisterBlock, + 0x000B => ExtCapabilityType::VendorSpecific, + 0x000C => ExtCapabilityType::ConfigurationAccessCorrelation, + 0x000D => ExtCapabilityType::AccessControlServices, + 0x000E => ExtCapabilityType::AlternativeRoutingId, + 0x000F => ExtCapabilityType::AddressTranslationServices, + 0x0010 => ExtCapabilityType::SingleRootIov, + 0x0011 => ExtCapabilityType::MultiRootIov, + 0x0012 => ExtCapabilityType::Multicast, + 0x0013 => ExtCapabilityType::PageRequestInterface, + 0x0015 => ExtCapabilityType::ResizableBar, + 0x0016 => ExtCapabilityType::DynamicPowerAllocation, + 0x0017 => ExtCapabilityType::TphRequester, + 0x0018 => ExtCapabilityType::LatencyToleranceReporting, + 0x0019 => ExtCapabilityType::SecondaryPciExpress, + 0x001A => ExtCapabilityType::ProtocolMultiplexing, + 0x001B => ExtCapabilityType::ProcessAddressSpaceId, + 0x001C => ExtCapabilityType::LnRequester, + 0x001D => ExtCapabilityType::DownstreamPortContainment, + 0x001E => ExtCapabilityType::L1PmSubstates, + 0x001F => ExtCapabilityType::PrecisionTimeMeasurement, + 0x0023 => ExtCapabilityType::DesignatedVendorSpecific, + 0x0024 => ExtCapabilityType::VfResizableBar, + 0x0025 => ExtCapabilityType::DataLinkFeature, + 0x0026 => ExtCapabilityType::PhysicalLayer16Gts, + 0x0027 => ExtCapabilityType::LaneMargining, + 0x002A => ExtCapabilityType::PhysicalLayer32Gts, + other => ExtCapabilityType::Unknown(other), + } + } + + pub fn to_id(&self) -> u16 { + match self { + ExtCapabilityType::AdvancedErrorReporting => 0x0001, + ExtCapabilityType::VirtualChannel => 0x0002, + ExtCapabilityType::DeviceSerialNumber => 0x0003, + ExtCapabilityType::PowerBudgeting => 0x0004, + ExtCapabilityType::RootComplexLinkDeclaration => 0x0005, + ExtCapabilityType::RootComplexInternalLinkControl => 0x0006, + ExtCapabilityType::RootComplexEventCollector => 0x0007, + ExtCapabilityType::MultiFunctionVirtualChannel => 0x0008, + ExtCapabilityType::VirtualChannelMFVC => 0x0009, + ExtCapabilityType::RootComplexRegisterBlock => 0x000A, + ExtCapabilityType::VendorSpecific => 0x000B, + ExtCapabilityType::ConfigurationAccessCorrelation => 0x000C, + ExtCapabilityType::AccessControlServices => 0x000D, + ExtCapabilityType::AlternativeRoutingId => 0x000E, + ExtCapabilityType::AddressTranslationServices => 0x000F, + ExtCapabilityType::SingleRootIov => 0x0010, + ExtCapabilityType::MultiRootIov => 0x0011, + ExtCapabilityType::Multicast => 0x0012, + ExtCapabilityType::PageRequestInterface => 0x0013, + ExtCapabilityType::ResizableBar => 0x0015, + ExtCapabilityType::DynamicPowerAllocation => 0x0016, + ExtCapabilityType::TphRequester => 0x0017, + ExtCapabilityType::LatencyToleranceReporting => 0x0018, + ExtCapabilityType::SecondaryPciExpress => 0x0019, + ExtCapabilityType::ProtocolMultiplexing => 0x001A, + ExtCapabilityType::ProcessAddressSpaceId => 0x001B, + ExtCapabilityType::LnRequester => 0x001C, + ExtCapabilityType::DownstreamPortContainment => 0x001D, + ExtCapabilityType::L1PmSubstates => 0x001E, + ExtCapabilityType::PrecisionTimeMeasurement => 0x001F, + ExtCapabilityType::DesignatedVendorSpecific => 0x0023, + ExtCapabilityType::VfResizableBar => 0x0024, + ExtCapabilityType::DataLinkFeature => 0x0025, + ExtCapabilityType::PhysicalLayer16Gts => 0x0026, + ExtCapabilityType::LaneMargining => 0x0027, + ExtCapabilityType::PhysicalLayer32Gts => 0x002A, + ExtCapabilityType::Unknown(id) => *id, + } + } +} + +impl core::fmt::Debug for ExtCapabilityType { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + ExtCapabilityType::AdvancedErrorReporting => { + write!(f, "AdvancedErrorReporting(0x0001)") + } + ExtCapabilityType::VirtualChannel => write!(f, "VirtualChannel(0x0002)"), + ExtCapabilityType::DeviceSerialNumber => write!(f, "DeviceSerialNumber(0x0003)"), + ExtCapabilityType::PowerBudgeting => write!(f, "PowerBudgeting(0x0004)"), + ExtCapabilityType::RootComplexLinkDeclaration => { + write!(f, "RootComplexLinkDeclaration(0x0005)") + } + ExtCapabilityType::RootComplexInternalLinkControl => { + write!(f, "RootComplexInternalLinkControl(0x0006)") + } + ExtCapabilityType::RootComplexEventCollector => { + write!(f, "RootComplexEventCollector(0x0007)") + } + ExtCapabilityType::MultiFunctionVirtualChannel => { + write!(f, "MultiFunctionVirtualChannel(0x0008)") + } + ExtCapabilityType::VirtualChannelMFVC => write!(f, "VirtualChannelMFVC(0x0009)"), + ExtCapabilityType::RootComplexRegisterBlock => { + write!(f, "RootComplexRegisterBlock(0x000A)") + } + ExtCapabilityType::VendorSpecific => write!(f, "VendorSpecific(0x000B)"), + ExtCapabilityType::ConfigurationAccessCorrelation => { + write!(f, "ConfigurationAccessCorrelation(0x000C)") + } + ExtCapabilityType::AccessControlServices => write!(f, "AccessControlServices(0x000D)"), + ExtCapabilityType::AlternativeRoutingId => write!(f, "AlternativeRoutingId(0x000E)"), + ExtCapabilityType::AddressTranslationServices => { + write!(f, "AddressTranslationServices(0x000F)") + } + ExtCapabilityType::SingleRootIov => write!(f, "SingleRootIov(SR-IOV)(0x0010)"), + ExtCapabilityType::MultiRootIov => write!(f, "MultiRootIov(MR-IOV)(0x0011)"), + ExtCapabilityType::Multicast => write!(f, "Multicast(0x0012)"), + ExtCapabilityType::PageRequestInterface => write!(f, "PageRequestInterface(0x0013)"), + ExtCapabilityType::ResizableBar => write!(f, "ResizableBar(0x0015)"), + ExtCapabilityType::DynamicPowerAllocation => { + write!(f, "DynamicPowerAllocation(0x0016)") + } + ExtCapabilityType::TphRequester => write!(f, "TphRequester(0x0017)"), + ExtCapabilityType::LatencyToleranceReporting => { + write!(f, "LatencyToleranceReporting(0x0018)") + } + ExtCapabilityType::SecondaryPciExpress => write!(f, "SecondaryPciExpress(0x0019)"), + ExtCapabilityType::ProtocolMultiplexing => write!(f, "ProtocolMultiplexing(0x001A)"), + ExtCapabilityType::ProcessAddressSpaceId => write!(f, "ProcessAddressSpaceId(0x001B)"), + ExtCapabilityType::LnRequester => write!(f, "LnRequester(0x001C)"), + ExtCapabilityType::DownstreamPortContainment => { + write!(f, "DownstreamPortContainment(0x001D)") + } + ExtCapabilityType::L1PmSubstates => write!(f, "L1PmSubstates(0x001E)"), + ExtCapabilityType::PrecisionTimeMeasurement => { + write!(f, "PrecisionTimeMeasurement(0x001F)") + } + ExtCapabilityType::DesignatedVendorSpecific => { + write!(f, "DesignatedVendorSpecific(0x0023)") + } + ExtCapabilityType::VfResizableBar => write!(f, "VfResizableBar(0x0024)"), + ExtCapabilityType::DataLinkFeature => write!(f, "DataLinkFeature(0x0025)"), + ExtCapabilityType::PhysicalLayer16Gts => write!(f, "PhysicalLayer16Gts(0x0026)"), + ExtCapabilityType::LaneMargining => write!(f, "LaneMargining(0x0027)"), + ExtCapabilityType::PhysicalLayer32Gts => write!(f, "PhysicalLayer32Gts(0x002A)"), + ExtCapabilityType::Unknown(id) => write!(f, "Unknown({:#06x})", id), + } + } +} + #[derive(Clone)] pub struct PciCapability { cap_type: CapabilityType, @@ -2184,6 +3145,122 @@ impl Debug for PciCapabilityList { } } +// ---- PCIe Extended Capability types ---- + +/// A single PCIe extended capability entry (config space 0x100–0xFFF). +#[derive(Clone, Copy, Debug)] +pub struct PciExtCapability { + pub cap_type: ExtCapabilityType, + /// Absolute offset of this extended capability header in config space. + pub offset: PciConfigAddress, + /// Capability structure version (bits 19:16 of the header DWORD). + pub version: u8, +} + +/// Iterator over PCIe extended capabilities starting at offset 0x100. +/// +/// Each header DWORD layout: +/// - bits\[15:0\] Extended Capability ID +/// - bits\[19:16\] Capability Version +/// - bits\[31:20\] Next Capability Offset (0 = end of list) +pub struct ExtCapabilityIterator { + backend: Arc, + offset: PciConfigAddress, +} + +impl ExtCapabilityIterator { + const EXT_CAP_START: PciConfigAddress = 0x100; + const EXT_CAP_END: PciConfigAddress = 0x1000; +} + +impl Iterator for ExtCapabilityIterator { + type Item = PciExtCapability; + + fn next(&mut self) -> Option { + // offset must be DWORD-aligned and leave room for a 4-byte DWORD read + if self.offset < Self::EXT_CAP_START + || self.offset > Self::EXT_CAP_END - 4 + || (self.offset & 3) != 0 + { + return None; + } + + let header = match self.backend.read(self.offset, 4) { + Ok(v) => v as u32, + Err(_) => return None, + }; + + // A null DWORD (0x00000000) or all-ones DWORD (0xFFFFFFFF, config space + // not implemented) both mean there are no (more) extended caps. + if header == 0 || header == 0xFFFFFFFF { + self.offset = Self::EXT_CAP_END; + return None; + } + + let id = (header & 0xFFFF) as u16; + let version = ((header >> 16) & 0xF) as u8; + let next_offset = ((header >> 20) & 0xFFF) as PciConfigAddress; + + let cap = PciExtCapability { + cap_type: ExtCapabilityType::from_id(id), + offset: self.offset, + version, + }; + + // next_offset == 0 means this is the last cap in the list. + // Validate: must be DWORD-aligned, within [EXT_CAP_START, EXT_CAP_END-4]. + // Any value outside this range (including 0xFFF etc.) stops iteration. + self.offset = if next_offset >= Self::EXT_CAP_START + && next_offset <= Self::EXT_CAP_END - 4 + && (next_offset & 3) == 0 + { + next_offset + } else { + Self::EXT_CAP_END // sentinel – stops iteration on the next call + }; + + Some(cap) + } +} + +/// Ordered map of PCIe extended capabilities keyed by config-space offset. +#[derive(Clone)] +pub struct PciExtCapabilityList(BTreeMap); + +impl PciExtCapabilityList { + pub fn new() -> Self { + Self(BTreeMap::new()) + } +} + +impl Deref for PciExtCapabilityList { + type Target = BTreeMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for PciExtCapabilityList { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Debug for PciExtCapabilityList { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "PciExtCapabilityList {{\n")?; + for (offset, cap) in &self.0 { + write!( + f, + " 0x{:x} {:?} (v{})\n", + offset, cap.cap_type, cap.version + )?; + } + write!(f, "}}") + } +} + impl VirtualPciConfigSpace { fn _capability_enumerate(&self, backend: Arc) -> CapabilityIterator { CapabilityIterator { @@ -2205,38 +3282,105 @@ impl VirtualPciConfigSpace { .cap_in_config .insert(capability.get_offset(), capability); } - info!("capability {:#?}", capabilities); + // info!("capability {:#?}", capabilities); self.capabilities = capabilities; } - //TODO: check secondary link by read cap - pub fn has_secondary_link(&self) -> bool { + fn _ext_capability_enumerate(&self, backend: Arc) -> ExtCapabilityIterator { + ExtCapabilityIterator { + backend, + offset: ExtCapabilityIterator::EXT_CAP_START, + } + } + + /// Walk the PCIe extended configuration space (0x100–0xFFF) and record all + /// extended capabilities found. No further parsing is performed. + pub fn ext_capability_enumerate(&mut self) { + let mut ext_caps = PciExtCapabilityList::new(); + for cap in self._ext_capability_enumerate(self.backend.clone()) { + ext_caps.insert(cap.offset, cap); + } + // info!("ext_capability {:#?}", ext_caps); + + // When the `sriov` feature is disabled, record info needed to splice + // the SR-IOV cap out of the ext-cap linked list for guest VMs. + #[cfg(not(feature = "sriov"))] + { + use bit_field::BitField; + let sriov_offset = ext_caps + .values() + .find(|c| c.cap_type == ExtCapabilityType::SingleRootIov) + .map(|c| c.offset); + + if let Some(sriov_cap_offset) = sriov_offset { + // Read the SR-IOV cap header DWORD to get its own `next` pointer. + let sriov_cap_next = self + .backend + .read(sriov_cap_offset, 4) + .ok() + .map(|dw| ((dw as u32).get_bits(20..32)) as PciConfigAddress) + .unwrap_or(0); + + // The preceding node is the one with the largest offset that is + // still less than sriov_cap_offset. + let prev_cap_offset = ext_caps + .range(..sriov_cap_offset) + .next_back() + .map(|(off, _)| *off); + + self.hide_sriov = Some(HideSriovInfo { + sriov_cap_offset, + sriov_cap_next, + prev_cap_offset, + }); + + // Mark these ranges as "emulated" so accesses go through + // handle_cap_access rather than the hardware direct path. + // + // SR-IOV cap range: we return 0 to hide it from the guest. + self.access.set_bits( + sriov_cap_offset as usize + ..(sriov_cap_offset as usize + SRIOV_CAP_SIZE as usize), + ); + // First DWORD of the preceding node: we patch the `next` pointer. + if let Some(prev) = prev_cap_offset { + self.access.set_bits(prev as usize..prev as usize + 4); + } + } + } + + self.ext_capabilities = ext_caps; + } + + // detect whether this bridge secondary bus can have only one child device. + pub fn has_only_one_child(&self) -> bool { match self.config_type { HeaderType::PciBridge => { - // Find PciExpress capability - // warn!("has_secondary_link {:#?}", self.capabilities); - // for (_, capability) in &self.capabilities { - // if capability.cap_type == CapabilityType::PciExpress { - // // Read PCIe Capability Register at offset + 0x00 - // // Bits 4:0 contain the Device/Port Type - // let offset = capability.get_offset(); - // if let Ok(cap_reg) = self.backend.read(offset, 2) { - // let type_val = (cap_reg as u16).get_bits(0..5); - // if type_val == PCI_EXP_TYPE_ROOT_PORT || type_val == PCI_EXP_TYPE_PCIE_BRIDGE { - // return true; - // } else if type_val == PCI_EXP_TYPE_UPSTREAM || type_val == PCI_EXP_TYPE_DOWNSTREAM { - // // Parent check is not implemented, set to false for now - // return false; - // } - // } - // break; - // } - // } - // false - // #[cfg(feature = "dwc_pcie")] - // return true; - // #[cfg(not(feature = "dwc_pcie"))] - return false; + // Parse PCIe Device/Port Type from PCI Express Capability Register + // (capability offset + 0x02, bits 7:4). + for capability in self._capability_enumerate(self.backend.clone()) { + if capability.get_type() != CapabilityType::PciExpress { + continue; + } + + let offset = capability.get_offset(); + if let Ok(cap_reg) = self.backend.read(offset + 0x2, 2) { + let port_type = (cap_reg as u16).get_bits(4..8) as u16; + return match port_type { + // Root Port / Downstream Port: secondary bus has a single downstream link. + PCI_EXP_TYPE_ROOT_PORT | PCI_EXP_TYPE_DOWNSTREAM => true, + // Upstream Port / PCIe-to-PCI bridge can have multiple children behind it. + PCI_EXP_TYPE_UPSTREAM | PCI_EXP_TYPE_PCIE_BRIDGE => false, + _ => false, + }; + } + + // Capability exists but cannot be read safely. + return false; + } + + // Non-PCIe bridge (or no PCIe capability): keep full secondary-bus scan. + false } _ => false, } diff --git a/src/zone.rs b/src/zone.rs index 3052a4f6..b01553f0 100644 --- a/src/zone.rs +++ b/src/zone.rs @@ -36,6 +36,13 @@ use crate::memory::{MMIOConfig, MMIOHandler, MMIORegion, MemorySet}; use core::panic; use core::sync::atomic::{AtomicBool, Ordering}; +#[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] +use crate::config::{HvPciConfig, HvPciDevConfig, CONFIG_MAX_PCI_DEV, CONFIG_PCI_BUS_MAXNUM}; +#[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] +use crate::pci::pci_config::GLOBAL_PCIE_LIST; +#[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] +use crate::pci::pci_struct::Bdf; + #[cfg(feature = "dwc_pcie")] #[derive(Debug)] pub struct VirtualAtuConfigs { @@ -66,13 +73,6 @@ impl VirtualAtuConfigs { self.ecam_to_atu.insert(ecam_base, atu) } - pub fn get_or_insert_atu(&mut self, ecam_base: usize, f: F) -> &mut AtuConfig - where - F: FnOnce() -> AtuConfig, - { - self.ecam_to_atu.entry(ecam_base).or_insert_with(f) - } - pub fn get_atu_by_io_base(&self, io_base: PciConfigAddress) -> Option<&AtuConfig> { let ecam = self.io_base_to_ecam.get(&io_base); if let Some(ecam) = ecam { @@ -317,6 +317,419 @@ impl ZoneInner { pub fn atu_configs_mut(&mut self) -> &mut VirtualAtuConfigs { &mut self.atu_configs } + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + pub fn guest_pci_init_delay( + &mut self, + _zone_id: usize, + alloc_pci_devs: &[HvPciDevConfig; CONFIG_MAX_PCI_DEV], + num_pci_devs: u64, + pci_config: &[HvPciConfig], + _num_pci_config: usize, + ) -> HvResult { + let guard = GLOBAL_PCIE_LIST.lock(); + for target_pci_config in pci_config { + // Skip empty config + if target_pci_config.ecam_base == 0 { + continue; + } + + #[allow(unused_variables)] + let ecam_base = target_pci_config.ecam_base; + let target_domain = target_pci_config.domain; + let bus_range_begin = target_pci_config.bus_range_begin as u8; + + // Create accessor for VirtualRootComplex, similar to RootComplex + #[cfg(feature = "dwc_pcie")] + { + use crate::pci::config_accessors::dwc::DwcConfigAccessor; + use crate::platform; + use alloc::sync::Arc; + + let atu_config = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|atu_cfg| atu_cfg.ecam_base == ecam_base); + + match atu_config { + Some(cfg) => { + let root_bus = bus_range_begin; + let accessor = Arc::new(DwcConfigAccessor::new(cfg, root_bus)); + self.vpci_bus_mut().set_accessor(accessor); + } + None => { + warn!("No ATU config found for ecam_base 0x{:x}", ecam_base); + continue; + } + } + } + + #[cfg(feature = "loongarch64_pcie")] + { + use crate::pci::config_accessors::loongarch64::LoongArchConfigAccessor; + use alloc::sync::Arc; + + let root_bus = bus_range_begin; + let accessor = Arc::new(LoongArchConfigAccessor::new( + ecam_base, + target_pci_config.ecam_size, + root_bus, + )); + self.vpci_bus_mut().set_accessor(accessor); + } + + #[cfg(feature = "ecam_pcie")] + { + use crate::pci::config_accessors::ecam::EcamConfigAccessor; + use alloc::sync::Arc; + + let accessor = Arc::new(EcamConfigAccessor::new(ecam_base)); + self.vpci_bus_mut().set_accessor(accessor); + } + + let mut filtered_devices: alloc::vec::Vec = alloc::vec::Vec::new(); + for i in 0..num_pci_devs { + let dev_config = alloc_pci_devs[i as usize]; + if dev_config.domain == target_domain { + filtered_devices.push(dev_config); + } + } + + // Skip if no devices for this domain + if filtered_devices.is_empty() { + continue; + } + + filtered_devices.sort_by(|a, b| { + a.bus + .cmp(&b.bus) + .then_with(|| a.device.cmp(&b.device)) + .then_with(|| a.function.cmp(&b.function)) + }); + + let mut domain_msi_count: u32 = 0; + + for dev_config in &filtered_devices { + let bdf = Bdf::new_from_config(*dev_config); + let vbdf = Bdf::new( + bdf.domain(), + dev_config.v_bus, + dev_config.v_device, + dev_config.v_function, + ); + + info!("set bdf {:#?} to vbdf {:#?}", bdf, vbdf); + + #[cfg(any( + all(feature = "iommu", target_arch = "aarch64"), + all(feature = "iommu", target_arch = "riscv64"), + target_arch = "x86_64" + ))] + { + let iommu_pt_addr = if self.iommu_pt().is_some() { + self.iommu_pt().unwrap().root_paddr() + } else { + 0 + }; + let device_id = (dev_config.bus as usize) << 8 + | (dev_config.device as usize) << 3 + | dev_config.function as usize; + #[cfg(feature = "share_s2pt")] + crate::device::iommu::iommu_add_device_with_root_pt_addr( + _zone_id, + device_id as _, + self.gpm().root_paddr(), + ); + #[cfg(not(feature = "share_s2pt"))] + crate::device::iommu::iommu_add_device_with_root_pt_addr( + _zone_id, + device_id as _, + iommu_pt_addr, + ); + } + + // Insert device into vpci_bus with calculated vbdf + if let Some(dev) = guard.get(&bdf) { + if bdf.is_host_bridge(dev.read().get_host_bdf().bus()) + || dev.with_config_value(|config_value| -> bool { + config_value.get_class().0 == 0x6 + }) + { + let mut vdev = dev.read().config_space.clone(); + vdev.set_vbdf(vbdf); + let msi_count = vdev.get_msi_count(); + domain_msi_count += msi_count; + self.vpci_bus_mut().insert(vbdf, vdev); + } else { + // Allow allocation if zone_id is None (unassigned), or if zone_id is + // Some(0) and the device is a SRIOV VF (initially assigned to root zone + // during enumeration, can be reassigned to a guest zone). + let is_sriov_vf_from_root = dev.get_zone_id() == Some(0) + && dev.read().get_sriov_vf_info().is_some(); + let is_pf = dev.read().get_sriov_info().is_some(); + if dev.get_zone_id().is_none() || is_sriov_vf_from_root { + if is_pf && _zone_id != 0 { + warn!( + "The SR-IOV PF {:#x?} can only be assigned to the root VM", + bdf + ); + } else { + dev.set_zone_id(Some(_zone_id as u32)); + let mut vdev_inner = dev.read().config_space.clone(); + vdev_inner.set_vbdf(vbdf); + let msi_count = vdev_inner.get_msi_count(); + domain_msi_count += msi_count; + self.vpci_bus_mut().insert(vbdf, vdev_inner); + } + } else { + warn!( + "Device {:#?} is already allocated to zone {:?}", + bdf, + dev.get_zone_id() + ); + } + } + } else { + warn!("can not find dev {:#?} in GLOBAL_PCIE_LIST (not detected during enumeration)", bdf); + #[cfg(feature = "ecam_pcie")] + { + use crate::pci::pci_struct::VirtualPciConfigSpace; + use crate::pci::vpci_dev::{get_handler, VpciDevType}; + + let dev_type = dev_config.dev_type; + match dev_type { + VpciDevType::Physical => { + warn!("can not find dev {:#?}", bdf); + } + _ => { + if let Some(_handler) = get_handler(dev_type) { + let base = ecam_base + + ((bdf.bus() as u64) << 20) + + ((bdf.device() as u64) << 15) + + ((bdf.function() as u64) << 12); + let dev = VirtualPciConfigSpace::virt_dev(bdf, base, dev_type); + self.vpci_bus_mut().insert(vbdf, dev); + } else { + warn!("can not find dev {:#?}, unknown device type", bdf); + } + } + } + } + } + } + + // After processing all devices for this domain, allocate hardware MSI bits + if domain_msi_count > 0 { + #[cfg(all(feature = "dwc_msi", feature = "dwc_pcie"))] + { + // Get the DW MSI domain allocator and allocate hwbit + if let Some(mut domain_lock) = + crate::pci::dwc_msi::get_dwc_msi_domain_mut(target_domain) + { + if let Some(domain_msi) = domain_lock.get_mut(&target_domain) { + let zone_cpu_set = self.cpu_set(); + let target_cpu = zone_cpu_set.first_cpu().unwrap_or(0); + match domain_msi.allocate_for_cpu(target_cpu, domain_msi_count) { + Ok(hwirq_bit) => { + // Register the MSI info for this domain + self.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + hwirq_bit, + ); + } + Err(e) => { + warn!( + "Failed to allocate MSI for domain {}: {:?}", + target_domain, e + ); + } + } + } + } + } + + #[cfg(not(feature = "dwc_msi"))] + { + // Without dwc_msi feature, just register without hardware bit allocation + self.vpci_bus_mut().add_msi_count_for_domain( + target_domain, + domain_msi_count, + 0, // hwirq_bit is 0 when not using dwc_msi + ); + } + } + } + info!("vpci bus init done\n {:#x?}", self.vpci_bus()); + Ok(()) + } + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + pub fn virtual_pci_dbi_pref_init( + &mut self, + pci_rootcomplex_config: &[HvPciConfig; CONFIG_PCI_BUS_MAXNUM], + _num_pci_config: usize, + ) { + use crate::pci::pci_handler::mmio_vpci_handler_dbi; + + for rootcomplex_config in pci_rootcomplex_config { + if rootcomplex_config.ecam_base == 0 { + continue; + } + + let encoded_arg = + rootcomplex_config.ecam_base as usize + (rootcomplex_config.domain as usize); + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_handler_dbi, + encoded_arg, + ); + } + } + + #[cfg(all(feature = "pci_init_delay", feature = "dwc_pcie"))] + pub fn virtual_pci_mmio_init_delay( + &mut self, + pci_rootcomplex_config: &[HvPciConfig; CONFIG_PCI_BUS_MAXNUM], + _num_pci_config: usize, + ) { + #[cfg(feature = "loongarch64_pcie")] + let mut emergency_map_regions: alloc::vec::Vec<(usize, usize)> = alloc::vec::Vec::new(); + + for rootcomplex_config in pci_rootcomplex_config { + if rootcomplex_config.ecam_base == 0 { + continue; + } + #[cfg(feature = "ecam_pcie")] + { + use crate::pci::pci_handler::mmio_vpci_handler; + + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_handler, + rootcomplex_config.ecam_base as usize, + ); + } + #[cfg(feature = "dwc_pcie")] + { + use crate::memory::mmio_generic_handler; + use crate::pci::config_accessors::dwc_atu::AtuConfig; + use crate::pci::config_accessors::{dwc::DwcConfigRegionBackend, PciRegionMmio}; + use crate::pci::pci_handler::{ + mmio_dwc_cfg_handler, mmio_dwc_io_handler, mmio_vpci_handler_dbi, + }; + use crate::platform; + + let encoded_arg = + rootcomplex_config.ecam_base as usize + (rootcomplex_config.domain as usize); + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_handler_dbi, + encoded_arg, + ); + + let extend_config = platform::ROOT_DWC_ATU_CONFIG + .iter() + .find(|extend_cfg| extend_cfg.ecam_base == rootcomplex_config.ecam_base); + + if let Some(extend_config) = extend_config { + if extend_config.apb_base != 0 && extend_config.apb_size != 0 { + self.mmio_region_register( + extend_config.apb_base as usize, + extend_config.apb_size as usize, + mmio_generic_handler, + extend_config.apb_base as usize, + ); + } + + let cfg_size_half = extend_config.cfg_size / 2; + let cfg0_base = extend_config.cfg_base; + if cfg0_base != 0 && cfg_size_half != 0 { + self.mmio_region_register( + cfg0_base as usize, + cfg_size_half as usize, + mmio_dwc_cfg_handler, + cfg0_base as usize, + ); + } + + let cfg1_base = extend_config.cfg_base + cfg_size_half; + if cfg1_base != 0 && cfg_size_half != 0 { + self.mmio_region_register( + cfg1_base as usize, + cfg_size_half as usize, + mmio_dwc_cfg_handler, + cfg1_base as usize, + ); + } + + if extend_config.io_cfg_atu_shared != 0 { + self.mmio_region_register( + rootcomplex_config.io_base as usize, + rootcomplex_config.io_size as usize, + mmio_dwc_io_handler, + rootcomplex_config.io_base as usize, + ); + } + + let mut atu = AtuConfig::default(); + + let dbi_base = extend_config.dbi_base as crate::pci::PciConfigAddress; + let dbi_size = extend_config.dbi_size; + let dbi_region = PciRegionMmio::new(dbi_base, dbi_size); + let dbi_backend = DwcConfigRegionBackend::new(dbi_region); + if let Err(e) = atu.init_limit_hw_value(&dbi_backend) { + warn!("Failed to initialize ATU0 limit defaults: {:?}", e); + } + + self.atu_configs_mut() + .insert_atu(rootcomplex_config.ecam_base as usize, atu); + self.atu_configs_mut().insert_cfg_base_mapping( + extend_config.cfg_base as crate::pci::PciConfigAddress, + rootcomplex_config.ecam_base as usize, + ); + self.atu_configs_mut().insert_cfg_base_mapping( + cfg1_base as crate::pci::PciConfigAddress, + rootcomplex_config.ecam_base as usize, + ); + self.atu_configs_mut().insert_io_base_mapping( + rootcomplex_config.io_base as crate::pci::PciConfigAddress, + rootcomplex_config.ecam_base as usize, + ); + } + } + #[cfg(feature = "loongarch64_pcie")] + { + use crate::pci::pci_handler::mmio_vpci_direct_handler; + + self.mmio_region_register( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + mmio_vpci_direct_handler, + rootcomplex_config.ecam_base as usize, + ); + emergency_map_regions.push(( + rootcomplex_config.ecam_base as usize, + rootcomplex_config.ecam_size as usize, + )); + } + #[cfg(not(any( + feature = "ecam_pcie", + feature = "dwc_pcie", + feature = "loongarch64_pcie" + )))] + { + warn!( + "No extend config found for base 0x{:x}", + rootcomplex_config.ecam_base + ); + } + } + + // Note: emergency_map_regions requires access to self (for Zone), so this must be handled at Zone level + } } static ZONE_LIST: RwLock>> = RwLock::new(vec![]); @@ -388,16 +801,57 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult> { zone.pt_init(config.memory_regions())?; zone.mmio_init(&config.arch_config); + let mut cpu_num = 0; + for cpu_id in config.cpus().iter() { + if let Some(existing_zone) = get_cpu_data(*cpu_id as _).zone.clone() { + return hv_result_err!( + EBUSY, + format!( + "Failed to create zone: cpu {} already belongs to zone {}", + cpu_id, + existing_zone.id() + ) + ); + } + zone.write().cpu_set_mut().set_bit(*cpu_id as _); + cpu_num += 1; + } + zone.write().set_cpu_num(cpu_num); + #[cfg(feature = "pci")] { - let _ = zone.virtual_pci_mmio_init(&config.pci_config, config.num_pci_bus as usize); - let _ = zone.guest_pci_init( - zone_id, - &config.alloc_pci_devs, - config.num_pci_devs, - &config.pci_config, - config.num_pci_bus as usize, - ); + #[cfg(feature = "pci_init_delay")] + { + #[cfg(feature = "dwc_pcie")] + { + let num_pci_bus = config.num_pci_bus as usize; + if zone_id == 0 { + let mut inner = zone.write(); + inner.virtual_pci_dbi_pref_init(&config.pci_config, num_pci_bus); + } else { + let _ = zone.virtual_pci_mmio_init(&config.pci_config, num_pci_bus); + let _ = zone.guest_pci_init( + zone_id, + &config.alloc_pci_devs, + config.num_pci_devs, + &config.pci_config, + num_pci_bus, + ); + } + } + } + + #[cfg(all(feature = "pci", not(feature = "pci_init_delay")))] + { + let _ = zone.virtual_pci_mmio_init(&config.pci_config, config.num_pci_bus as usize); + let _ = zone.guest_pci_init( + zone_id, + &config.alloc_pci_devs, + config.num_pci_devs, + &config.pci_config, + config.num_pci_bus as usize, + ); + } } #[cfg(feature = "viommu")] @@ -419,22 +873,6 @@ pub fn zone_create(config: &HvZoneConfig) -> HvResult> { // config.pci_config[0].ecam_size as _, // )?; - let mut cpu_num = 0; - for cpu_id in config.cpus().iter() { - if let Some(existing_zone) = get_cpu_data(*cpu_id as _).zone.clone() { - return hv_result_err!( - EBUSY, - format!( - "Failed to create zone: cpu {} already belongs to zone {}", - cpu_id, - existing_zone.id() - ) - ); - } - zone.write().cpu_set_mut().set_bit(*cpu_id as _); - cpu_num += 1; - } - zone.write().set_cpu_num(cpu_num); let cpu_set = zone.read().cpu_set(); info!("zone cpu_set: {:#b}", cpu_set.bitmap); diff --git a/tools/check_hv_mem_overlap.py b/tools/check_hv_mem_overlap.py index e3ffe761..07818380 100644 --- a/tools/check_hv_mem_overlap.py +++ b/tools/check_hv_mem_overlap.py @@ -19,9 +19,10 @@ import re import subprocess import sys +from typing import Optional -def get_symbol_value(elf_path: str, symbol: str) -> int | None: +def get_symbol_value(elf_path: str, symbol: str) -> Optional[int]: """Read a symbol value from the ELF using rust-nm.""" result = subprocess.run( ["rust-nm", elf_path],