From b5832c530fcc7bde7048657f10f19f4fe15d6c6e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Dec 2010 20:51:43 -0200 Subject: [RHEL6 qemu-kvm PATCH 18/18] device-assignment: pass through and stub more PCI caps RH-Author: Alex Williamson Message-id: <20101207205143.4625.7820.stgit@s20.home> Patchwork-id: 14385 O-Subject: [RHEL6.1 qemu-kvm PATCH 18/18] device-assignment: pass through and stub more PCI caps Bugzilla: 624790 RH-Acked-by: Don Dutile RH-Acked-by: Michael S. Tsirkin RH-Acked-by: Jes Sorensen Upstream commit: cb1983b8809d0e06a97384a40bad1194a32fc814 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=624790 Brew build: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=2948901 Some drivers depend on finding capabilities like power management, PCI express/X, vital product data, or vendor specific fields. Now that we have better capability support, we can pass more of these tables through to the guest. Note that VPD and VNDR are direct pass through capabilies, the rest are mostly empty shells with a few writable bits where necessary. It may be possible to consolidate dummy capabilities into common files for other drivers to use, but I prefer to leave them here for now as we figure out what bits to handle directly with hardware and what bits are purely emulated. Signed-off-by: Alex Williamson Signed-off-by: Avi Kivity --- hw/device-assignment.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 256 insertions(+), 7 deletions(-) Signed-off-by: Eduardo Habkost --- hw/device-assignment.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 256 insertions(+), 7 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index ed59fa3..54e105f 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -67,6 +67,9 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len); +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len); + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -418,11 +421,32 @@ static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) return (uint8_t)assigned_dev_pci_read(d, pos, 1); } -static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap) +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) +{ + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + ssize_t ret; + int fd = pci_dev->real_device.config_fd; + +again: + ret = pwrite(fd, &val, len, pos); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) + goto again; + + fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", + __func__, ret, errno); + + exit(1); + } + + return; +} + +static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) { int id; int max_cap = 48; - int pos = PCI_CAPABILITY_LIST; + int pos = start ? start : PCI_CAPABILITY_LIST; int status; status = assigned_dev_pci_read_byte(d, PCI_STATUS); @@ -501,10 +525,16 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, ssize_t ret; AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { + val = assigned_device_pci_cap_read_config(d, address, len); + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); + return val; + } + if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) || (address >= 0x10 && address <= 0x24) || address == 0x30 || - address == 0x34 || address == 0x3c || address == 0x3d || - (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address])) { + address == 0x34 || address == 0x3c || address == 0x3d) { val = pci_default_read_config(d, address, len); DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); @@ -1221,7 +1251,70 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) #endif #endif -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, +/* There can be multiple VNDR capabilities per device, we need to find the + * one that starts closet to the given address without going over. */ +static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address) +{ + uint8_t cap, pos; + + for (cap = pos = 0; + (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); + pos += PCI_CAP_LIST_NEXT) { + if (pos <= address) { + cap = MAX(pos, cap); + } + } + return cap; +} + +/* Merge the bits set in mask from mval into val. Both val and mval are + * at the same addr offset, pos is the starting offset of the mask. */ +static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr, + int len, uint8_t pos, uint32_t mask) +{ + if (!ranges_overlap(addr, len, pos, 4)) { + return val; + } + + if (addr >= pos) { + mask >>= (addr - pos) * 8; + } else { + mask <<= (pos - addr) * 8; + } + mask &= 0xffffffffU >> (4 - len) * 8; + + val &= ~mask; + val |= (mval & mask); + + return val; +} + +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + uint8_t cap, cap_id = pci_dev->config_map[address]; + uint32_t val; + + switch (cap_id) { + + case PCI_CAP_ID_VPD: + cap = pci_find_capability(pci_dev, cap_id); + val = assigned_dev_pci_read(pci_dev, address, len); + return merge_bits(val, pci_get_long(pci_dev->config + address), + address, len, cap + PCI_CAP_LIST_NEXT, 0xff); + + case PCI_CAP_ID_VNDR: + cap = find_vndr_start(pci_dev, address); + val = assigned_dev_pci_read(pci_dev, address, len); + return merge_bits(val, pci_get_long(pci_dev->config + address), + address, len, cap + PCI_CAP_LIST_NEXT, 0xff); + } + + return pci_default_read_config(pci_dev, address, len); +} + +static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len) { uint8_t cap_id = pci_dev->config_map[address]; @@ -1251,6 +1344,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t ad #endif break; #endif + + case PCI_CAP_ID_VPD: + case PCI_CAP_ID_VNDR: + assigned_dev_pci_write(pci_dev, address, val, len); + break; } } @@ -1270,7 +1368,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #ifdef KVM_CAP_DEVICE_MSI /* Expose MSI capability * MSI capability is the 1st capability in capability config */ - if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI))) { + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0))) { dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI; /* Only 32-bit/no-mask currently supported */ if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10)) < 0) { @@ -1292,7 +1390,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #endif #ifdef KVM_CAP_DEVICE_MSIX /* Expose MSI-X capability */ - if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX))) { + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0))) { int bar_nr; uint32_t msix_table_entry; @@ -1317,6 +1415,157 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #endif #endif + /* Minimal PM support, nothing writable, device appears to NAK changes */ + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0))) { + uint16_t pmc; + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, + PCI_PM_SIZEOF)) < 0) { + return ret; + } + + pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); + pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); + pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); + + /* assign_device will bring the device up to D0, so we don't need + * to worry about doing that ourselves here. */ + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, + PCI_PM_CTRL_NO_SOFT_RST); + + pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0); + pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0))) { + uint8_t version; + uint16_t type, devctl, lnkcap, lnksta; + uint32_t devcap; + int size = 0x3c; /* version 2 size */ + + version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS); + version &= PCI_EXP_FLAGS_VERS; + if (version == 1) { + size = 0x14; + } else if (version > 2) { + fprintf(stderr, "Unsupported PCI express capability version %d\n", + version); + return -EINVAL; + } + + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_EXP, + pos, size)) < 0) { + return ret; + } + + type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); + type = (type & PCI_EXP_FLAGS_TYPE) >> 8; + if (type != PCI_EXP_TYPE_ENDPOINT && + type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) { + fprintf(stderr, + "Device assignment only supports endpoint assignment, " + "device type %d\n", type); + return -EINVAL; + } + + /* capabilities, pass existing read-only copy + * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */ + + /* device capabilities: hide FLR */ + devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP); + devcap &= ~PCI_EXP_DEVCAP_FLR; + pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap); + + /* device control: clear all error reporting enable bits, leaving + * leaving only a few host values. Note, these are + * all writable, but not passed to hw. + */ + devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); + devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | + PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); + devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; + pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); + + /* Clear device status */ + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0); + + /* Link capabilities, expose links and latencues, clear reporting */ + lnkcap = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKCAP); + lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | + PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | + PCI_EXP_LNKCAP_L1EL); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); + pci_set_word(pci_dev->wmask + pos + PCI_EXP_LNKCAP, + PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_RCB | + PCI_EXP_LNKCTL_CCC | PCI_EXP_LNKCTL_ES | + PCI_EXP_LNKCTL_CLKREQ_EN | PCI_EXP_LNKCTL_HAWD); + + /* Link control, pass existing read-only copy. Should be writable? */ + + /* Link status, only expose current speed and width */ + lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); + lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); + + if (version >= 2) { + /* Slot capabilities, control, status - not needed for endpoints */ + pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0); + + /* Root control, capabilities, status - not needed for endpoints */ + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0); + pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0); + + /* Device capabilities/control 2, pass existing read-only copy */ + /* Link control 2, pass existing read-only copy */ + } + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0))) { + uint16_t cmd; + uint32_t status; + + /* Only expose the minimum, 8 byte capability */ + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8)) < 0) { + return ret; + } + + /* Command register, clear upper bits, including extended modes */ + cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); + cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | + PCI_X_CMD_MAX_SPLIT); + pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); + + /* Status register, update with emulated PCI bus location, clear + * error bits, leave the rest. */ + status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); + status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); + status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn; + status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | + PCI_X_STATUS_SPL_ERR); + pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0))) { + /* Direct R/W passthrough */ + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) { + return ret; + } + } + + /* Devices can have multiple vendor capabilities, get them all */ + for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); + pos += PCI_CAP_LIST_NEXT) { + uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); + /* Direct R/W passthrough */ + if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, + pos, len)) < 0) { + return ret; + } + } + return 0; } -- 1.7.3.2