diff options
-rw-r--r-- | hw/misc/vfio.c | 139 |
1 files changed, 134 insertions, 5 deletions
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index b170fd245d..7437c2e3c3 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -160,10 +160,18 @@ typedef struct VFIOContainer { }; void (*release)(struct VFIOContainer *); } iommu_data; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; QLIST_HEAD(, VFIOGroup) group_list; QLIST_ENTRY(VFIOContainer) next; } VFIOContainer; +typedef struct VFIOGuestIOMMU { + VFIOContainer *container; + MemoryRegion *iommu; + Notifier n; + QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; +} VFIOGuestIOMMU; + /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ typedef struct VFIOMSIXInfo { uint8_t table_bar; @@ -2383,7 +2391,8 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, static bool vfio_listener_skipped_section(MemoryRegionSection *section) { - return !memory_region_is_ram(section->mr) || + return (!memory_region_is_ram(section->mr) && + !memory_region_is_iommu(section->mr)) || /* * Sizing an enabled 64-bit BAR can cause spurious mappings to * addresses in the upper part of the 64-bit address space. These @@ -2393,6 +2402,65 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) section->offset_within_address_space & (1ULL << 63); } +static void vfio_iommu_map_notify(Notifier *n, void *data) +{ + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); + VFIOContainer *container = giommu->container; + IOMMUTLBEntry *iotlb = data; + MemoryRegion *mr; + hwaddr xlat; + hwaddr len = iotlb->addr_mask + 1; + void *vaddr; + int ret; + + DPRINTF("iommu map @ %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", + iotlb->iova, iotlb->iova + iotlb->addr_mask); + + /* + * The IOMMU TLB entry we have just covers translation through + * this IOMMU to its immediate target. We need to translate + * it the rest of the way through to memory. + */ + mr = address_space_translate(&address_space_memory, + iotlb->translated_addr, + &xlat, &len, iotlb->perm & IOMMU_WO); + if (!memory_region_is_ram(mr)) { + DPRINTF("iommu map to non memory area %"HWADDR_PRIx"\n", + xlat); + return; + } + /* + * Translation truncates length to the IOMMU page size, + * check that it did not truncate too much. + */ + if (len & iotlb->addr_mask) { + DPRINTF("iommu has granularity incompatible with target AS\n"); + return; + } + + if (iotlb->perm != IOMMU_NONE) { + vaddr = memory_region_get_ram_ptr(mr) + xlat; + + ret = vfio_dma_map(container, iotlb->iova, + iotlb->addr_mask + 1, vaddr, + !(iotlb->perm & IOMMU_WO) || mr->readonly); + if (ret) { + error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%m)", + container, iotlb->iova, + iotlb->addr_mask + 1, vaddr, ret); + } + } else { + ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", + container, iotlb->iova, + iotlb->addr_mask + 1, ret); + } + } +} + static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { @@ -2403,8 +2471,6 @@ static void vfio_listener_region_add(MemoryListener *listener, void *vaddr; int ret; - assert(!memory_region_is_iommu(section->mr)); - if (vfio_listener_skipped_section(section)) { DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n", section->offset_within_address_space, @@ -2428,15 +2494,57 @@ static void vfio_listener_region_add(MemoryListener *listener, return; } + memory_region_ref(section->mr); + + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + + DPRINTF("region_add [iommu] %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", + iova, int128_get64(int128_sub(llend, int128_one()))); + /* + * FIXME: We should do some checking to see if the + * capabilities of the host VFIO IOMMU are adequate to model + * the guest IOMMU + * + * FIXME: For VFIO iommu types which have KVM acceleration to + * avoid bouncing all map/unmaps through qemu this way, this + * would be the right place to wire that up (tell the KVM + * device emulation the VFIO iommu handles to use). + */ + /* + * This assumes that the guest IOMMU is empty of + * mappings at this point. + * + * One way of doing this is: + * 1. Avoid sharing IOMMUs between emulated devices or different + * IOMMU groups. + * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if + * there are some mappings in IOMMU. + * + * VFIO on SPAPR does that. Other IOMMU models may do that different, + * they must make sure there are no existing mappings or + * loop through existing mappings to map them into VFIO. + */ + giommu = g_malloc0(sizeof(*giommu)); + giommu->iommu = section->mr; + giommu->container = container; + giommu->n.notify = vfio_iommu_map_notify; + QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); + memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); + + return; + } + + /* Here we assume that memory_region_is_ram(section->mr)==true */ + end = int128_get64(llend); vaddr = memory_region_get_ram_ptr(section->mr) + section->offset_within_region + (iova - section->offset_within_address_space); - DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n", + DPRINTF("region_add [ram] %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n", iova, end - 1, vaddr); - memory_region_ref(section->mr); ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); if (ret) { error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " @@ -2480,6 +2588,27 @@ static void vfio_listener_region_del(MemoryListener *listener, return; } + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + + QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { + if (giommu->iommu == section->mr) { + memory_region_unregister_iommu_notifier(&giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + break; + } + } + + /* + * FIXME: We assume the one big unmap below is adequate to + * remove any individual page mappings in the IOMMU which + * might have been copied into VFIO. This works for a page table + * based IOMMU where a big unmap flattens a large range of IO-PTEs. + * That may not be true for all IOMMU types. + */ + } + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); end = (section->offset_within_address_space + int128_get64(section->size)) & TARGET_PAGE_MASK; |