diff options
76 files changed, 2087 insertions, 1317 deletions
diff --git a/.gitignore b/.gitignore index 0c5af83aa7..90acb4347d 100644 --- a/.gitignore +++ b/.gitignore @@ -34,18 +34,18 @@ /qapi/qapi-builtin-types.[ch] /qapi/qapi-builtin-visit.[ch] /qapi/qapi-commands-*.[ch] -/qapi/qapi-commands.[ch] -/qapi/qapi-emit-events.[ch] +**/qapi/qapi-commands.[ch] +**/qapi/qapi-emit-events.[ch] /qapi/qapi-events-*.[ch] -/qapi/qapi-events.[ch] -/qapi/qapi-init-commands.[ch] -/qapi/qapi-introspect.[ch] +**/qapi/qapi-events.[ch] +**/qapi/qapi-init-commands.[ch] +**/qapi/qapi-introspect.[ch] /qapi/qapi-types-*.[ch] -/qapi/qapi-types.[ch] +**/qapi/qapi-types.[ch] /qapi/qapi-visit-*.[ch] !/qapi/qapi-visit-core.c -/qapi/qapi-visit.[ch] -/qapi/qapi-doc.texi +**/qapi/qapi-visit.[ch] +**/qapi/qapi-doc.texi /qemu-edid /qemu-img /qemu-nbd @@ -59,6 +59,7 @@ /qemu-keymap /qemu-monitor.texi /qemu-monitor-info.texi +/qemu-storage-daemon /qemu-version.h /qemu-version.h.tmp /module_block.h diff --git a/Makefile.objs b/Makefile.objs index c09d95dfe3..7ce2588b89 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -13,7 +13,7 @@ chardev-obj-y = chardev/ authz-obj-y = authz/ -block-obj-y = block/ block/monitor/ nbd/ scsi/ +block-obj-y = block/ nbd/ scsi/ block-obj-y += block.o blockjob.o job.o block-obj-y += qemu-io-cmds.o block-obj-$(CONFIG_REPLICATION) += replication.o diff --git a/block/Makefile.objs b/block/Makefile.objs index 3635b6b4c1..96028eedce 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -46,6 +46,7 @@ block-obj-y += aio_task.o block-obj-y += backup-top.o block-obj-y += filter-compress.o common-obj-y += monitor/ +block-obj-y += monitor/ block-obj-y += stream.o diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 7bf12502da..1f38806ca6 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1757,19 +1757,20 @@ bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs) } /* - * Compute the space required for bitmaps in @bs. + * Compute the space required to copy bitmaps from @in_bs. * * The computation is based as if copying to a new image with the - * given @cluster_size, which may differ from the cluster size in @bs. + * given @cluster_size, which may differ from the cluster size in + * @in_bs; in fact, @in_bs might be something other than qcow2. */ -uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, +uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *in_bs, uint32_t cluster_size) { uint64_t bitmaps_size = 0; BdrvDirtyBitmap *bm; size_t bitmap_dir_size = 0; - FOR_EACH_DIRTY_BITMAP(bs, bm) { + FOR_EACH_DIRTY_BITMAP(in_bs, bm) { if (bdrv_dirty_bitmap_get_persistence(bm)) { const char *name = bdrv_dirty_bitmap_name(bm); uint32_t granularity = bdrv_dirty_bitmap_granularity(bm); diff --git a/chardev/char.c b/chardev/char.c index ea06c5ff4d..e3051295ac 100644 --- a/chardev/char.c +++ b/chardev/char.c @@ -38,6 +38,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "qemu/id.h" +#include "qemu/coroutine.h" #include "chardev/char-mux.h" @@ -119,7 +120,11 @@ static int qemu_chr_write_buffer(Chardev *s, retry: res = cc->chr_write(s, buf + *offset, len - *offset); if (res < 0 && errno == EAGAIN && write_all) { - g_usleep(100); + if (qemu_in_coroutine()) { + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + } else { + g_usleep(100); + } goto retry; } @@ -97,6 +97,11 @@ do_cxx() { do_compiler "$cxx" "$@" } +# Append $2 to the variable named $1, with space separation +add_to() { + eval $1=\${$1:+\"\$$1 \"}\$2 +} + update_cxxflags() { # Set QEMU_CXXFLAGS from QEMU_CFLAGS by filtering out those # options which some versions of GCC's C++ compiler complain about @@ -2024,16 +2029,35 @@ if ! compile_prog "" "" ; then error_exit "You need at least GCC v4.8 or Clang v3.4 (or XCode Clang v5.1)" fi -gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits" -gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags" -gcc_flags="-Wno-missing-include-dirs -Wempty-body -Wnested-externs $gcc_flags" -gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags" -gcc_flags="-Wno-initializer-overrides -Wexpansion-to-defined $gcc_flags" -gcc_flags="-Wno-string-plus-int -Wno-typedef-redefinition $gcc_flags" -# Note that we do not add -Werror to gcc_flags here, because that would -# enable it for all configure tests. If a configure test failed due -# to -Werror this would just silently disable some features, -# so it's too error prone. +# Accumulate -Wfoo and -Wno-bar separately. +# We will list all of the enable flags first, and the disable flags second. +# Note that we do not add -Werror, because that would enable it for all +# configure tests. If a configure test failed due to -Werror this would +# just silently disable some features, so it's too error prone. + +warn_flags= +add_to warn_flags -Wold-style-declaration +add_to warn_flags -Wold-style-definition +add_to warn_flags -Wtype-limits +add_to warn_flags -Wformat-security +add_to warn_flags -Wformat-y2k +add_to warn_flags -Winit-self +add_to warn_flags -Wignored-qualifiers +add_to warn_flags -Wempty-body +add_to warn_flags -Wnested-externs +add_to warn_flags -Wendif-labels +add_to warn_flags -Wexpansion-to-defined + +nowarn_flags= +add_to nowarn_flags -Wno-initializer-overrides +add_to nowarn_flags -Wno-missing-include-dirs +add_to nowarn_flags -Wno-shift-negative-value +add_to nowarn_flags -Wno-string-plus-int +add_to nowarn_flags -Wno-typedef-redefinition +add_to nowarn_flags -Wno-tautological-type-limit-compare +add_to nowarn_flags -Wno-psabi + +gcc_flags="$warn_flags $nowarn_flags" cc_has_warning_flag() { write_c_skeleton; diff --git a/dma-helpers.c b/dma-helpers.c index e8a26e81e1..2a77b5a9cb 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -13,6 +13,8 @@ #include "trace-root.h" #include "qemu/thread.h" #include "qemu/main-loop.h" +#include "sysemu/cpus.h" +#include "qemu/range.h" /* #define DEBUG_IOMMU */ @@ -142,6 +144,26 @@ static void dma_blk_cb(void *opaque, int ret) cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir); + /* + * Make reads deterministic in icount mode. Windows sometimes issues + * disk read requests with overlapping SGs. It leads + * to non-determinism, because resulting buffer contents may be mixed + * from several sectors. This code splits all SGs into several + * groups. SGs in every group do not overlap. + */ + if (mem && use_icount && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { + int i; + for (i = 0 ; i < dbs->iov.niov ; ++i) { + if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, + dbs->iov.iov[i].iov_len, (intptr_t)mem, + cur_len)) { + dma_memory_unmap(dbs->sg->as, mem, cur_len, + dbs->dir, cur_len); + mem = NULL; + break; + } + } + } if (!mem) break; qemu_iovec_add(&dbs->iov, mem, cur_len); diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst index 544ece0a45..3a255591c3 100644 --- a/docs/system/deprecated.rst +++ b/docs/system/deprecated.rst @@ -47,12 +47,6 @@ The 'file' driver for drives is no longer appropriate for character or host devices and will only accept regular files (S_IFREG). The correct driver for these file types is 'host_cdrom' or 'host_device' as appropriate. -``-net ...,name=``\ *name* (since 3.1) -'''''''''''''''''''''''''''''''''''''' - -The ``name`` parameter of the ``-net`` option is a synonym -for the ``id`` parameter, which should now be used instead. - ``-smp`` (invalid topologies) (since 3.1) ''''''''''''''''''''''''''''''''''''''''' @@ -441,6 +435,15 @@ What follows is a record of recently removed, formerly deprecated features that serves as a record for users who have encountered trouble after a recent upgrade. +System emulator command line arguments +-------------------------------------- + +``-net ...,name=``\ *name* (removed in 5.1) +''''''''''''''''''''''''''''''''''''''''''' + +The ``name`` parameter of the ``-net`` option was a synonym +for the ``id`` parameter, which should now be used instead. + QEMU Machine Protocol (QMP) commands ------------------------------------ diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt index b431bdaf0f..6bd1828f34 100644 --- a/docs/xbzrle.txt +++ b/docs/xbzrle.txt @@ -112,10 +112,12 @@ is recommended. cache size: H bytes xbzrle transferred: I kbytes xbzrle pages: J pages - xbzrle cache miss: K - xbzrle overflow: L + xbzrle cache miss: K pages + xbzrle cache miss rate: L + xbzrle encoding rate: M + xbzrle overflow: N -xbzrle cache-miss: the number of cache misses to date - high cache-miss rate +xbzrle cache miss: the number of cache misses to date - high cache-miss rate indicates that the cache size is set too low. xbzrle overflow: the number of overflows in the decoding which where the delta could not be compressed. This can happen if the changes in the pages are too diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 6c8f2d597a..5e9746c287 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -3362,7 +3362,9 @@ static int32_t roundAndPackInt32(bool zSign, uint64_t absZ, } roundBits = absZ & 0x7F; absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x40) && roundNearestEven) { + absZ &= ~1; + } z = absZ; if ( zSign ) z = - z; if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { @@ -3420,7 +3422,9 @@ static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1, if ( increment ) { ++absZ0; if ( absZ0 == 0 ) goto overflow; - absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven ); + if (!(absZ1 << 1) && roundNearestEven) { + absZ0 &= ~1; + } } z = absZ0; if ( zSign ) z = - z; @@ -3480,7 +3484,9 @@ static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0, float_raise(float_flag_invalid, status); return UINT64_MAX; } - absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); + if (!(absZ1 << 1) && roundNearestEven) { + absZ0 &= ~1; + } } if (zSign && absZ0) { @@ -3603,7 +3609,9 @@ static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, status->float_exception_flags |= float_flag_inexact; } zSig = ( zSig + roundIncrement )>>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x40) && roundNearestEven) { + zSig &= ~1; + } if ( zSig == 0 ) zExp = 0; return packFloat32( zSign, zExp, zSig ); @@ -3757,7 +3765,9 @@ static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, status->float_exception_flags |= float_flag_inexact; } zSig = ( zSig + roundIncrement )>>10; - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x200) && roundNearestEven) { + zSig &= ~1; + } if ( zSig == 0 ) zExp = 0; return packFloat64( zSign, zExp, zSig ); @@ -3983,8 +3993,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, } if ( increment ) { ++zSig0; - zSig0 &= - ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; + } if ( (int64_t) zSig0 < 0 ) zExp = 1; } return packFloatx80( zSign, zExp, zSig0 ); @@ -4000,7 +4011,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, zSig0 = UINT64_C(0x8000000000000000); } else { - zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; + } } } else { @@ -4270,7 +4283,9 @@ static float128 roundAndPackFloat128(bool zSign, int32_t zExp, } if ( increment ) { add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); - zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + if ((zSig2 + zSig2 == 0) && roundNearestEven) { + zSig1 &= ~1; + } } else { if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; diff --git a/hmp-commands.hx b/hmp-commands.hx index 28256209b5..60f395c276 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1806,9 +1806,10 @@ ERST { .name = "qom-set", - .args_type = "path:s,property:s,value:S", - .params = "path property value", - .help = "set QOM property", + .args_type = "json:-j,path:s,property:s,value:S", + .params = "[-j] path property value", + .help = "set QOM property.\n\t\t\t" + "-j: the value is specified in json format.", .cmd = hmp_qom_set, .flags = "p", }, diff --git a/hw/block/block.c b/hw/block/block.c index bf56c7612b..1e34573da7 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -61,7 +61,7 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size, return true; } -void blkconf_blocksizes(BlockConf *conf) +bool blkconf_blocksizes(BlockConf *conf, Error **errp) { BlockBackend *blk = conf->blk; BlockSizes blocksizes; @@ -83,6 +83,44 @@ void blkconf_blocksizes(BlockConf *conf) conf->logical_block_size = BDRV_SECTOR_SIZE; } } + + if (conf->logical_block_size > conf->physical_block_size) { + error_setg(errp, + "logical_block_size > physical_block_size not supported"); + return false; + } + + if (!QEMU_IS_ALIGNED(conf->min_io_size, conf->logical_block_size)) { + error_setg(errp, + "min_io_size must be a multiple of logical_block_size"); + return false; + } + + /* + * all devices which support min_io_size (scsi and virtio-blk) expose it to + * the guest as a uint16_t in units of logical blocks + */ + if (conf->min_io_size / conf->logical_block_size > UINT16_MAX) { + error_setg(errp, "min_io_size must not exceed %u logical blocks", + UINT16_MAX); + return false; + } + + if (!QEMU_IS_ALIGNED(conf->opt_io_size, conf->logical_block_size)) { + error_setg(errp, + "opt_io_size must be a multiple of logical_block_size"); + return false; + } + + if (conf->discard_granularity != -1 && + !QEMU_IS_ALIGNED(conf->discard_granularity, + conf->logical_block_size)) { + error_setg(errp, "discard_granularity must be " + "a multiple of logical_block_size"); + return false; + } + + return true; } bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 1b52e8159c..37499c5564 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -220,6 +220,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) goto fail_guest_notifiers; } + /* Process queued requests before the ones in vring */ + virtio_blk_process_queued_requests(vblk, false); + /* Kick right away to begin processing requests already in vring */ for (i = 0; i < nvqs; i++) { VirtQueue *vq = virtio_get_queue(s->vdev, i); @@ -239,6 +242,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) return 0; fail_guest_notifiers: + /* + * If we failed to set up the guest notifiers queued requests will be + * processed on the main context. + */ + virtio_blk_process_queued_requests(vblk, false); vblk->dataplane_disabled = true; s->starting = false; vblk->dataplane_started = true; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 8528b9a3c7..be0674e4aa 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -554,7 +554,10 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp) read_only = !blk_bs(dev->conf.blk) || blk_is_read_only(dev->conf.blk); } - blkconf_blocksizes(&dev->conf); + if (!blkconf_blocksizes(&dev->conf, errp)) { + return; + } + if (dev->conf.logical_block_size != 512 || dev->conf.physical_block_size != 512) { diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a21eeca2fb..1aee042d4c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -20,7 +20,7 @@ * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ * cmb_size_mb=<cmb_size_mb[optional]>, \ * [pmrdev=<mem_backend_file_id>,] \ - * num_queues=<N[optional]> + * max_ioqpairs=<N[optional]> * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. @@ -36,6 +36,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/error-report.h" #include "hw/block/block.h" #include "hw/pci/msix.h" #include "hw/pci/pci.h" @@ -53,6 +54,12 @@ #include "trace.h" #include "nvme.h" +#define NVME_MAX_IOQPAIRS 0xffff +#define NVME_REG_SIZE 0x1000 +#define NVME_DB_SIZE 4 +#define NVME_CMB_BIR 2 +#define NVME_PMR_BIR 2 + #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ (trace_##trace)(__VA_ARGS__); \ @@ -62,24 +69,32 @@ static void nvme_process_sq(void *opaque); +static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) +{ + hwaddr low = n->ctrl_mem.addr; + hwaddr hi = n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size); + + return addr >= low && addr < hi; +} + static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { - if (n->cmbsz && addr >= n->ctrl_mem.addr && - addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { + if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) { memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); - } else { - pci_dma_read(&n->parent_obj, addr, buf, size); + return; } + + pci_dma_read(&n->parent_obj, addr, buf, size); } static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { - return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; + return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; } static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) { - return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1; + return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; } static void nvme_inc_cq_tail(NvmeCQueue *cq) @@ -122,16 +137,16 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) { if (cq->irq_enabled) { if (msix_enabled(&(n->parent_obj))) { - trace_nvme_irq_msix(cq->vector); + trace_pci_nvme_irq_msix(cq->vector); msix_notify(&(n->parent_obj), cq->vector); } else { - trace_nvme_irq_pin(); - assert(cq->cqid < 64); - n->irq_status |= 1 << cq->cqid; + trace_pci_nvme_irq_pin(); + assert(cq->vector < 32); + n->irq_status |= 1 << cq->vector; nvme_irq_check(n); } } else { - trace_nvme_irq_masked(); + trace_pci_nvme_irq_masked(); } } @@ -141,8 +156,8 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) if (msix_enabled(&(n->parent_obj))) { return; } else { - assert(cq->cqid < 64); - n->irq_status &= ~(1 << cq->cqid); + assert(cq->vector < 32); + n->irq_status &= ~(1 << cq->vector); nvme_irq_check(n); } } @@ -156,9 +171,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, int num_prps = (len >> n->page_bits) + 1; if (unlikely(!prp1)) { - trace_nvme_err_invalid_prp(); + trace_pci_nvme_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; - } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && + } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr && prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { qsg->nsg = 0; qemu_iovec_init(iov, num_prps); @@ -170,7 +185,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, len -= trans_len; if (len) { if (unlikely(!prp2)) { - trace_nvme_err_invalid_prp2_missing(); + trace_pci_nvme_err_invalid_prp2_missing(); goto unmap; } if (len > n->page_size) { @@ -186,7 +201,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, if (i == n->max_prp_ents - 1 && len > n->page_size) { if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { - trace_nvme_err_invalid_prplist_ent(prp_ent); + trace_pci_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } @@ -199,7 +214,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, } if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { - trace_nvme_err_invalid_prplist_ent(prp_ent); + trace_pci_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } @@ -214,7 +229,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, } } else { if (unlikely(prp2 & (n->page_size - 1))) { - trace_nvme_err_invalid_prp2_align(prp2); + trace_pci_nvme_err_invalid_prp2_align(prp2); goto unmap; } if (qsg->nsg) { @@ -262,20 +277,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, QEMUIOVector iov; uint16_t status = NVME_SUCCESS; - trace_nvme_dma_read(prp1, prp2); + trace_pci_nvme_dma_read(prp1, prp2); if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { return NVME_INVALID_FIELD | NVME_DNR; } if (qsg.nsg > 0) { if (unlikely(dma_buf_read(ptr, len, &qsg))) { - trace_nvme_err_invalid_dma(); + trace_pci_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } qemu_sglist_destroy(&qsg); } else { if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { - trace_nvme_err_invalid_dma(); + trace_pci_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } qemu_iovec_destroy(&iov); @@ -364,7 +379,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, uint32_t count = nlb << data_shift; if (unlikely(slba + nlb > ns->id_ns.nsze)) { - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -392,11 +407,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; - trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); + trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); if (unlikely((slba + nlb) > ns->id_ns.nsze)) { block_acct_invalid(blk_get_stats(n->conf.blk), acct); - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -431,7 +446,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t nsid = le32_to_cpu(cmd->nsid); if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { - trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } @@ -445,7 +460,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_CMD_READ: return nvme_rw(n, ns, cmd, req); default: - trace_nvme_err_invalid_opc(cmd->opcode); + trace_pci_nvme_err_invalid_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -470,11 +485,11 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qid = le16_to_cpu(c->qid); if (unlikely(!qid || nvme_check_sqid(n, qid))) { - trace_nvme_err_invalid_del_sq(qid); + trace_pci_nvme_err_invalid_del_sq(qid); return NVME_INVALID_QID | NVME_DNR; } - trace_nvme_del_sq(qid); + trace_pci_nvme_del_sq(qid); sq = n->sq[qid]; while (!QTAILQ_EMPTY(&sq->out_req_list)) { @@ -538,26 +553,26 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qflags = le16_to_cpu(c->sq_flags); uint64_t prp1 = le64_to_cpu(c->prp1); - trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); + trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { - trace_nvme_err_invalid_create_sq_cqid(cqid); + trace_pci_nvme_err_invalid_create_sq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { - trace_nvme_err_invalid_create_sq_sqid(sqid); + trace_pci_nvme_err_invalid_create_sq_sqid(sqid); return NVME_INVALID_QID | NVME_DNR; } if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { - trace_nvme_err_invalid_create_sq_size(qsize); + trace_pci_nvme_err_invalid_create_sq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { - trace_nvme_err_invalid_create_sq_addr(prp1); + trace_pci_nvme_err_invalid_create_sq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { - trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); + trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); return NVME_INVALID_FIELD | NVME_DNR; } sq = g_malloc0(sizeof(*sq)); @@ -583,17 +598,17 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qid = le16_to_cpu(c->qid); if (unlikely(!qid || nvme_check_cqid(n, qid))) { - trace_nvme_err_invalid_del_cq_cqid(qid); + trace_pci_nvme_err_invalid_del_cq_cqid(qid); return NVME_INVALID_CQID | NVME_DNR; } cq = n->cq[qid]; if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { - trace_nvme_err_invalid_del_cq_notempty(qid); + trace_pci_nvme_err_invalid_del_cq_notempty(qid); return NVME_INVALID_QUEUE_DEL; } nvme_irq_deassert(n, cq); - trace_nvme_del_cq(qid); + trace_pci_nvme_del_cq(qid); nvme_free_cq(cq, n); return NVME_SUCCESS; } @@ -601,6 +616,10 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled) { + int ret; + + ret = msix_vector_use(&n->parent_obj, vector); + assert(ret == 0); cq->ctrl = n; cq->cqid = cqid; cq->size = size; @@ -611,7 +630,6 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, cq->head = cq->tail = 0; QTAILQ_INIT(&cq->req_list); QTAILQ_INIT(&cq->sq_list); - msix_vector_use(&n->parent_obj, cq->vector); n->cq[cqid] = cq; cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); } @@ -626,27 +644,31 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qflags = le16_to_cpu(c->cq_flags); uint64_t prp1 = le64_to_cpu(c->prp1); - trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, - NVME_CQ_FLAGS_IEN(qflags) != 0); + trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags, + NVME_CQ_FLAGS_IEN(qflags) != 0); if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { - trace_nvme_err_invalid_create_cq_cqid(cqid); + trace_pci_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { - trace_nvme_err_invalid_create_cq_size(qsize); + trace_pci_nvme_err_invalid_create_cq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } if (unlikely(!prp1)) { - trace_nvme_err_invalid_create_cq_addr(prp1); + trace_pci_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } - if (unlikely(vector > n->num_queues)) { - trace_nvme_err_invalid_create_cq_vector(vector); + if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { + trace_pci_nvme_err_invalid_create_cq_vector(vector); + return NVME_INVALID_IRQ_VECTOR | NVME_DNR; + } + if (unlikely(vector >= n->params.msix_qsize)) { + trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { - trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); + trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); return NVME_INVALID_FIELD | NVME_DNR; } @@ -661,7 +683,7 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); - trace_nvme_identify_ctrl(); + trace_pci_nvme_identify_ctrl(); return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1, prp2); @@ -674,10 +696,10 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); - trace_nvme_identify_ns(nsid); + trace_pci_nvme_identify_ns(nsid); if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { - trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } @@ -689,7 +711,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) { - static const int data_len = 4 * KiB; + static const int data_len = NVME_IDENTIFY_DATA_SIZE; uint32_t min_nsid = le32_to_cpu(c->nsid); uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); @@ -697,7 +719,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) uint16_t ret; int i, j = 0; - trace_nvme_identify_nslist(min_nsid); + trace_pci_nvme_identify_nslist(min_nsid); list = g_malloc0(data_len); for (i = 0; i < n->num_namespaces; i++) { @@ -719,21 +741,21 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) NvmeIdentify *c = (NvmeIdentify *)cmd; switch (le32_to_cpu(c->cns)) { - case 0x00: + case NVME_ID_CNS_NS: return nvme_identify_ns(n, c); - case 0x01: + case NVME_ID_CNS_CTRL: return nvme_identify_ctrl(n, c); - case 0x02: + case NVME_ID_CNS_NS_ACTIVE_LIST: return nvme_identify_nslist(n, c); default: - trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); + trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; } } static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts) { - trace_nvme_setfeat_timestamp(ts); + trace_pci_nvme_setfeat_timestamp(ts); n->host_timestamp = le64_to_cpu(ts); n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); @@ -766,7 +788,7 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) /* If the host timestamp is non-zero, set the timestamp origin */ ts.origin = n->host_timestamp ? 0x01 : 0x00; - trace_nvme_getfeat_timestamp(ts.all); + trace_pci_nvme_getfeat_timestamp(ts.all); return cpu_to_le64(ts.all); } @@ -790,17 +812,17 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); - trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); + trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: - result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); - trace_nvme_getfeat_numq(result); + result = cpu_to_le32((n->params.max_ioqpairs - 1) | + ((n->params.max_ioqpairs - 1) << 16)); + trace_pci_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); - break; default: - trace_nvme_err_invalid_getfeat(dw10); + trace_pci_nvme_err_invalid_getfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } @@ -836,19 +858,17 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; case NVME_NUMBER_OF_QUEUES: - trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, - ((dw11 >> 16) & 0xFFFF) + 1, - n->num_queues - 1, n->num_queues - 1); - req->cqe.result = - cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, + ((dw11 >> 16) & 0xFFFF) + 1, + n->params.max_ioqpairs, + n->params.max_ioqpairs); + req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | + ((n->params.max_ioqpairs - 1) << 16)); break; - case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); - break; - default: - trace_nvme_err_invalid_setfeat(dw10); + trace_pci_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } return NVME_SUCCESS; @@ -872,7 +892,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); default: - trace_nvme_err_invalid_admin_opc(cmd->opcode); + trace_pci_nvme_err_invalid_admin_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -914,12 +934,12 @@ static void nvme_clear_ctrl(NvmeCtrl *n) blk_drain(n->conf.blk); - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.max_ioqpairs + 1; i++) { if (n->sq[i] != NULL) { nvme_free_sq(n->sq[i], n); } } - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.max_ioqpairs + 1; i++) { if (n->cq[i] != NULL) { nvme_free_cq(n->cq[i], n); } @@ -935,77 +955,77 @@ static int nvme_start_ctrl(NvmeCtrl *n) uint32_t page_size = 1 << page_bits; if (unlikely(n->cq[0])) { - trace_nvme_err_startfail_cq(); + trace_pci_nvme_err_startfail_cq(); return -1; } if (unlikely(n->sq[0])) { - trace_nvme_err_startfail_sq(); + trace_pci_nvme_err_startfail_sq(); return -1; } if (unlikely(!n->bar.asq)) { - trace_nvme_err_startfail_nbarasq(); + trace_pci_nvme_err_startfail_nbarasq(); return -1; } if (unlikely(!n->bar.acq)) { - trace_nvme_err_startfail_nbaracq(); + trace_pci_nvme_err_startfail_nbaracq(); return -1; } if (unlikely(n->bar.asq & (page_size - 1))) { - trace_nvme_err_startfail_asq_misaligned(n->bar.asq); + trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq); return -1; } if (unlikely(n->bar.acq & (page_size - 1))) { - trace_nvme_err_startfail_acq_misaligned(n->bar.acq); + trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq); return -1; } if (unlikely(NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap))) { - trace_nvme_err_startfail_page_too_small( + trace_pci_nvme_err_startfail_page_too_small( NVME_CC_MPS(n->bar.cc), NVME_CAP_MPSMIN(n->bar.cap)); return -1; } if (unlikely(NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap))) { - trace_nvme_err_startfail_page_too_large( + trace_pci_nvme_err_startfail_page_too_large( NVME_CC_MPS(n->bar.cc), NVME_CAP_MPSMAX(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { - trace_nvme_err_startfail_cqent_too_small( + trace_pci_nvme_err_startfail_cqent_too_small( NVME_CC_IOCQES(n->bar.cc), NVME_CTRL_CQES_MIN(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { - trace_nvme_err_startfail_cqent_too_large( + trace_pci_nvme_err_startfail_cqent_too_large( NVME_CC_IOCQES(n->bar.cc), NVME_CTRL_CQES_MAX(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { - trace_nvme_err_startfail_sqent_too_small( + trace_pci_nvme_err_startfail_sqent_too_small( NVME_CC_IOSQES(n->bar.cc), NVME_CTRL_SQES_MIN(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { - trace_nvme_err_startfail_sqent_too_large( + trace_pci_nvme_err_startfail_sqent_too_large( NVME_CC_IOSQES(n->bar.cc), NVME_CTRL_SQES_MAX(n->bar.cap)); return -1; } if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { - trace_nvme_err_startfail_asqent_sz_zero(); + trace_pci_nvme_err_startfail_asqent_sz_zero(); return -1; } if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { - trace_nvme_err_startfail_acqent_sz_zero(); + trace_pci_nvme_err_startfail_acqent_sz_zero(); return -1; } @@ -1028,14 +1048,14 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, unsigned size) { if (unlikely(offset & (sizeof(uint32_t) - 1))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32, "MMIO write not 32-bit aligned," " offset=0x%"PRIx64"", offset); /* should be ignored, fall through for now */ } if (unlikely(size < sizeof(uint32_t))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall, "MMIO write smaller than 32-bits," " offset=0x%"PRIx64", size=%u", offset, size); @@ -1045,32 +1065,30 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, switch (offset) { case 0xc: /* INTMS */ if (unlikely(msix_enabled(&(n->parent_obj)))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask set" " when MSI-X is enabled"); /* should be ignored, fall through for now */ } n->bar.intms |= data & 0xffffffff; n->bar.intmc = n->bar.intms; - trace_nvme_mmio_intm_set(data & 0xffffffff, - n->bar.intmc); + trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc); nvme_irq_check(n); break; case 0x10: /* INTMC */ if (unlikely(msix_enabled(&(n->parent_obj)))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask clr" " when MSI-X is enabled"); /* should be ignored, fall through for now */ } n->bar.intms &= ~(data & 0xffffffff); n->bar.intmc = n->bar.intms; - trace_nvme_mmio_intm_clr(data & 0xffffffff, - n->bar.intmc); + trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc); nvme_irq_check(n); break; case 0x14: /* CC */ - trace_nvme_mmio_cfg(data & 0xffffffff); + trace_pci_nvme_mmio_cfg(data & 0xffffffff); /* Windows first sends data, then sends enable bit */ if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) @@ -1081,42 +1099,42 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { n->bar.cc = data; if (unlikely(nvme_start_ctrl(n))) { - trace_nvme_err_startfail(); + trace_pci_nvme_err_startfail(); n->bar.csts = NVME_CSTS_FAILED; } else { - trace_nvme_mmio_start_success(); + trace_pci_nvme_mmio_start_success(); n->bar.csts = NVME_CSTS_READY; } } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { - trace_nvme_mmio_stopped(); + trace_pci_nvme_mmio_stopped(); nvme_clear_ctrl(n); n->bar.csts &= ~NVME_CSTS_READY; } if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { - trace_nvme_mmio_shutdown_set(); + trace_pci_nvme_mmio_shutdown_set(); nvme_clear_ctrl(n); n->bar.cc = data; n->bar.csts |= NVME_CSTS_SHST_COMPLETE; } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { - trace_nvme_mmio_shutdown_cleared(); + trace_pci_nvme_mmio_shutdown_cleared(); n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; n->bar.cc = data; } break; case 0x1C: /* CSTS */ if (data & (1 << 4)) { - NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, "attempted to W1C CSTS.NSSRO" " but CAP.NSSRS is zero (not supported)"); } else if (data != 0) { - NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts, "attempted to set a read only bit" " of controller status"); } break; case 0x20: /* NSSR */ if (data == 0x4E564D65) { - trace_nvme_ub_mmiowr_ssreset_unsupported(); + trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); } else { /* The spec says that writes of other values have no effect */ return; @@ -1124,55 +1142,55 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, break; case 0x24: /* AQA */ n->bar.aqa = data & 0xffffffff; - trace_nvme_mmio_aqattr(data & 0xffffffff); + trace_pci_nvme_mmio_aqattr(data & 0xffffffff); break; case 0x28: /* ASQ */ n->bar.asq = data; - trace_nvme_mmio_asqaddr(data); + trace_pci_nvme_mmio_asqaddr(data); break; case 0x2c: /* ASQ hi */ n->bar.asq |= data << 32; - trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); + trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq); break; case 0x30: /* ACQ */ - trace_nvme_mmio_acqaddr(data); + trace_pci_nvme_mmio_acqaddr(data); n->bar.acq = data; break; case 0x34: /* ACQ hi */ n->bar.acq |= data << 32; - trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); + trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq); break; case 0x38: /* CMBLOC */ - NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, "invalid write to reserved CMBLOC" " when CMBSZ is zero, ignored"); return; case 0x3C: /* CMBSZ */ - NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; case 0xE00: /* PMRCAP */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; case 0xE04: /* TODO PMRCTL */ break; case 0xE08: /* PMRSTS */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; case 0xE0C: /* PMREBS */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; case 0xE10: /* PMRSWTP */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; case 0xE14: /* TODO PMRMSC */ break; default: - NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, "invalid MMIO write," " offset=0x%"PRIx64", data=%"PRIx64"", offset, data); @@ -1187,12 +1205,12 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) uint64_t val = 0; if (unlikely(addr & (sizeof(uint32_t) - 1))) { - NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, + NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32, "MMIO read not 32-bit aligned," " offset=0x%"PRIx64"", addr); /* should RAZ, fall through for now */ } else if (unlikely(size < sizeof(uint32_t))) { - NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, + NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall, "MMIO read smaller than 32-bits," " offset=0x%"PRIx64"", addr); /* should RAZ, fall through for now */ @@ -1210,7 +1228,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) } memcpy(&val, ptr + addr, size); } else { - NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, + NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, "MMIO read beyond last register," " offset=0x%"PRIx64", returning 0", addr); } @@ -1223,7 +1241,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) uint32_t qid; if (unlikely(addr & ((1 << 2) - 1))) { - NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, "doorbell write not 32-bit aligned," " offset=0x%"PRIx64", ignoring", addr); return; @@ -1238,7 +1256,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) qid = (addr - (0x1000 + (1 << 2))) >> 3; if (unlikely(nvme_check_cqid(n, qid))) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq, "completion queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); @@ -1247,7 +1265,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) cq = n->cq[qid]; if (unlikely(new_head >= cq->size)) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead, "completion queue doorbell write value" " beyond queue size, sqid=%"PRIu32"," " new_head=%"PRIu16", ignoring", @@ -1276,7 +1294,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) qid = (addr - 0x1000) >> 3; if (unlikely(nvme_check_sqid(n, qid))) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq, "submission queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); @@ -1285,7 +1303,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) sq = n->sq[qid]; if (unlikely(new_tail >= sq->size)) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail, "submission queue doorbell write value" " beyond queue size, sqid=%"PRIu32"," " new_tail=%"PRIu16", ignoring", @@ -1342,37 +1360,42 @@ static const MemoryRegionOps nvme_cmb_ops = { }, }; -static void nvme_realize(PCIDevice *pci_dev, Error **errp) +static void nvme_check_constraints(NvmeCtrl *n, Error **errp) { - NvmeCtrl *n = NVME(pci_dev); - NvmeIdCtrl *id = &n->id_ctrl; + NvmeParams *params = &n->params; - int i; - int64_t bs_size; - uint8_t *pci_conf; + if (params->num_queues) { + warn_report("num_queues is deprecated; please use max_ioqpairs " + "instead"); - if (!n->num_queues) { - error_setg(errp, "num_queues can't be zero"); + params->max_ioqpairs = params->num_queues - 1; + } + + if (params->max_ioqpairs < 1 || + params->max_ioqpairs > NVME_MAX_IOQPAIRS) { + error_setg(errp, "max_ioqpairs must be between 1 and %d", + NVME_MAX_IOQPAIRS); return; } - if (!n->conf.blk) { - error_setg(errp, "drive property not set"); + if (params->msix_qsize < 1 || + params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) { + error_setg(errp, "msix_qsize must be between 1 and %d", + PCI_MSIX_FLAGS_QSIZE + 1); return; } - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg(errp, "could not get backing file size"); + if (!n->conf.blk) { + error_setg(errp, "drive property not set"); return; } - if (!n->serial) { + if (!params->serial) { error_setg(errp, "serial property not set"); return; } - if (!n->cmb_size_mb && n->pmrdev) { + if (!n->params.cmb_size_mb && n->pmrdev) { if (host_memory_backend_is_mapped(n->pmrdev)) { char *path = object_get_canonical_path_component(OBJECT(n->pmrdev)); error_setg(errp, "can't use already busy memdev: %s", path); @@ -1387,39 +1410,154 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) host_memory_backend_set_mapped(n->pmrdev, true); } +} + +static void nvme_init_state(NvmeCtrl *n) +{ + n->num_namespaces = 1; + /* add one to max_ioqpairs to account for the admin queue pair */ + n->reg_size = pow2ceil(NVME_REG_SIZE + + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); + n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); + n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); + n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); +} + +static void nvme_init_blk(NvmeCtrl *n, Error **errp) +{ + if (!blkconf_blocksizes(&n->conf, errp)) { + return; + } + blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), + false, errp); +} + +static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +{ + int64_t bs_size; + NvmeIdNs *id_ns = &ns->id_ns; - blkconf_blocksizes(&n->conf); - if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), - false, errp)) { + bs_size = blk_getlength(n->conf.blk); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, "could not get backing file size"); return; } - pci_conf = pci_dev->config; + n->ns_size = bs_size; + + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(n, ns)); + + /* no thin provisioning */ + id_ns->ncap = id_ns->nsze; + id_ns->nuse = id_ns->ncap; +} + +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) +{ + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR); + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); +} + +static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) +{ + /* Controller Capabilities register */ + NVME_CAP_SET_PMRS(n->bar.cap, 1); + + /* PMR Capabities register */ + n->bar.pmrcap = 0; + NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR); + NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); + /* Turn on bit 1 support */ + NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); + NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); + + /* PMR Control register */ + n->bar.pmrctl = 0; + NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); + + /* PMR Status register */ + n->bar.pmrsts = 0; + NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); + + /* PMR Elasticity Buffer Size register */ + n->bar.pmrebs = 0; + NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); + NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); + NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); + + /* PMR Sustained Write Throughput register */ + n->bar.pmrswtp = 0; + NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); + NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); + + /* PMR Memory Space Control register */ + n->bar.pmrmsc = 0; + NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); + NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); + + pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); +} + +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) +{ + uint8_t *pci_conf = pci_dev->config; + pci_conf[PCI_INTERRUPT_PIN] = 1; - pci_config_set_prog_interface(pci_dev->config, 0x2); - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); + pci_config_set_prog_interface(pci_conf, 0x2); + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); pcie_endpoint_cap_init(pci_dev, 0x80); - n->num_namespaces = 1; - n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); - n->ns_size = bs_size / (uint64_t)n->num_namespaces; + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + n->reg_size); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); + if (msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp)) { + return; + } - n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); - n->sq = g_new0(NvmeSQueue *, n->num_queues); - n->cq = g_new0(NvmeCQueue *, n->num_queues); + if (n->params.cmb_size_mb) { + nvme_init_cmb(n, pci_dev); + } else if (n->pmrdev) { + nvme_init_pmr(n, pci_dev); + } +} - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, - "nvme", n->reg_size); - pci_register_bar(pci_dev, 0, - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, - &n->iomem); - msix_init_exclusive_bar(pci_dev, n->num_queues, 4, NULL); +static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) +{ + NvmeIdCtrl *id = &n->id_ctrl; + uint8_t *pci_conf = pci_dev->config; id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); id->rab = 6; id->ieee[0] = 0x00; id->ieee[1] = 0x02; @@ -1447,90 +1585,42 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; +} - if (n->cmb_size_mb) { - - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); +static void nvme_realize(PCIDevice *pci_dev, Error **errp) +{ + NvmeCtrl *n = NVME(pci_dev); + Error *local_err = NULL; - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); + int i; - n->cmbloc = n->bar.cmbloc; - n->cmbsz = n->bar.cmbsz; + nvme_check_constraints(n, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + nvme_init_state(n); + nvme_init_blk(n, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } - } else if (n->pmrdev) { - /* Controller Capabilities register */ - NVME_CAP_SET_PMRS(n->bar.cap, 1); - - /* PMR Capabities register */ - n->bar.pmrcap = 0; - NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); - NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); - NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2); - NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); - /* Turn on bit 1 support */ - NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); - NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); - NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); - - /* PMR Control register */ - n->bar.pmrctl = 0; - NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); - - /* PMR Status register */ - n->bar.pmrsts = 0; - NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); - NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); - NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); - NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); - - /* PMR Elasticity Buffer Size register */ - n->bar.pmrebs = 0; - NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); - NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); - NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); - - /* PMR Sustained Write Throughput register */ - n->bar.pmrswtp = 0; - NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); - NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); - - /* PMR Memory Space Control register */ - n->bar.pmrmsc = 0; - NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); - NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); - - pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); + nvme_init_pci(n, pci_dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } + nvme_init_ctrl(n, pci_dev); + for (i = 0; i < n->num_namespaces; i++) { - NvmeNamespace *ns = &n->namespaces[i]; - NvmeIdNs *id_ns = &ns->id_ns; - id_ns->nsfeat = 0; - id_ns->nlbaf = 0; - id_ns->flbas = 0; - id_ns->mc = 0; - id_ns->dpc = 0; - id_ns->dps = 0; - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(n->ns_size >> - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); + nvme_init_namespace(n, &n->namespaces[i], &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } } @@ -1543,7 +1633,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cq); g_free(n->sq); - if (n->cmb_size_mb) { + if (n->params.cmb_size_mb) { g_free(n->cmbuf); } @@ -1557,9 +1647,11 @@ static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND, HostMemoryBackend *), - DEFINE_PROP_STRING("serial", NvmeCtrl, serial), - DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), - DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), + DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial), + DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0), + DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0), + DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64), + DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 6520a9f0be..1d30c0bca2 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -1,7 +1,16 @@ #ifndef HW_NVME_H #define HW_NVME_H + #include "block/nvme.h" +typedef struct NvmeParams { + char *serial; + uint32_t num_queues; /* deprecated since 5.1 */ + uint32_t max_ioqpairs; + uint16_t msix_qsize; + uint32_t cmb_size_mb; +} NvmeParams; + typedef struct NvmeAsyncEvent { QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry; NvmeAerResult result; @@ -53,6 +62,17 @@ typedef struct NvmeNamespace { NvmeIdNs id_ns; } NvmeNamespace; +static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns) +{ + NvmeIdNs *id_ns = &ns->id_ns; + return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; +} + +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) +{ + return nvme_ns_lbaf(ns)->ds; +} + #define TYPE_NVME "nvme" #define NVME(obj) \ OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) @@ -63,6 +83,7 @@ typedef struct NvmeCtrl { MemoryRegion ctrl_mem; NvmeBar bar; BlockConf conf; + NvmeParams params; uint32_t page_size; uint16_t page_bits; @@ -71,18 +92,13 @@ typedef struct NvmeCtrl { uint16_t sqe_size; uint32_t reg_size; uint32_t num_namespaces; - uint32_t num_queues; uint32_t max_q_ents; uint64_t ns_size; - uint32_t cmb_size_mb; - uint32_t cmbsz; - uint32_t cmbloc; uint8_t *cmbuf; - uint64_t irq_status; + uint32_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - char *serial; HostMemoryBackend *pmrdev; NvmeNamespace *namespaces; @@ -93,4 +109,10 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; +/* calculate the number of LBAs that the namespace can accomodate */ +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) +{ + return n->ns_size >> nvme_ns_lbads(ns); +} + #endif /* HW_NVME_H */ diff --git a/hw/block/swim.c b/hw/block/swim.c index 8f124782f4..74f56e8f46 100644 --- a/hw/block/swim.c +++ b/hw/block/swim.c @@ -189,7 +189,10 @@ static void swim_drive_realize(DeviceState *qdev, Error **errp) assert(ret == 0); } - blkconf_blocksizes(&dev->conf); + if (!blkconf_blocksizes(&dev->conf, errp)) { + return; + } + if (dev->conf.logical_block_size != 512 || dev->conf.physical_block_size != 512) { diff --git a/hw/block/trace-events b/hw/block/trace-events index aca54bda14..958fcc5508 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -29,100 +29,100 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int t # nvme.c # nvme traces for successful events -nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" -nvme_irq_pin(void) "pulsing IRQ pin" -nvme_irq_masked(void) "IRQ is masked" -nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" -nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" -nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" -nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" -nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" -nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" -nvme_identify_ctrl(void) "identify controller" -nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" -nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" -nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" -nvme_getfeat_numq(int result) "get feature number of queues, result=%d" -nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" -nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" -nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" -nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" -nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" -nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" -nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" -nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -nvme_mmio_start_success(void) "setting controller enable bit succeeded" -nvme_mmio_stopped(void) "cleared controller enable bit" -nvme_mmio_shutdown_set(void) "shutdown bit set" -nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" +pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" +pci_nvme_irq_pin(void) "pulsing IRQ pin" +pci_nvme_irq_masked(void) "IRQ is masked" +pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" +pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" +pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" +pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" +pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" +pci_nvme_identify_ctrl(void) "identify controller" +pci_nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" +pci_nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" +pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" +pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" +pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" +pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" +pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" +pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" +pci_nvme_mmio_stopped(void) "cleared controller enable bit" +pci_nvme_mmio_shutdown_set(void) "shutdown bit set" +pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" # nvme traces for error conditions -nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" -nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" -nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" -nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" -nvme_err_invalid_prp(void) "invalid PRP" -nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" -nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" -nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" -nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" -nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" -nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" -nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" -nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" -nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" -nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" -nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" -nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" -nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" -nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" -nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" -nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" -nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" -nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" -nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" -nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" -nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" -nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" -nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" -nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" -nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" -nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" -nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" -nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" -nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" -nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" -nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" -nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" -nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" -nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" -nvme_err_startfail(void) "setting controller enable bit failed" +pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" +pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" +pci_nvme_err_invalid_prp(void) "invalid PRP" +pci_nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" +pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" +pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" +pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" +pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" +pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" +pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" +pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" +pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" +pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" +pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" +pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" +pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" +pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" +pci_nvme_err_startfail(void) "setting controller enable bit failed" # Traces for undefined behavior -nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" -nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" -nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" -nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" -nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" -nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" -nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" -nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" -nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" -nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" -nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" -nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" -nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" -nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" -nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" -nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" -nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" -nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" -nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" -nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" -nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" +pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" +pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" +pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" +pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" +pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" +pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" +pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" +pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" +pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" +pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" +pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" # xen-block.c xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index f5f6fc925e..413783693c 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -819,15 +819,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_handle_output_do(s, vq); } -static void virtio_blk_dma_restart_bh(void *opaque) +void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) { - VirtIOBlock *s = opaque; VirtIOBlockReq *req = s->rq; MultiReqBuffer mrb = {}; - qemu_bh_delete(s->bh); - s->bh = NULL; - s->rq = NULL; aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); @@ -851,22 +847,38 @@ static void virtio_blk_dma_restart_bh(void *opaque) if (mrb.num_reqs) { virtio_blk_submit_multireq(s->blk, &mrb); } - blk_dec_in_flight(s->conf.conf.blk); + if (is_bh) { + blk_dec_in_flight(s->conf.conf.blk); + } aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } +static void virtio_blk_dma_restart_bh(void *opaque) +{ + VirtIOBlock *s = opaque; + + qemu_bh_delete(s->bh); + s->bh = NULL; + + virtio_blk_process_queued_requests(s, true); +} + static void virtio_blk_dma_restart_cb(void *opaque, int running, RunState state) { VirtIOBlock *s = opaque; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusState *bus = VIRTIO_BUS(qbus); if (!running) { return; } - if (!s->bh) { - /* FIXME The data plane is not started yet, so these requests are - * processed in the main thread. */ + /* + * If ioeventfd is enabled, don't schedule the BH here as queued + * requests will be processed while starting the data plane. + */ + if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), virtio_blk_dma_restart_bh, s); blk_inc_in_flight(s->conf.conf.blk); @@ -918,7 +930,7 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); - virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); + virtio_stl_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); blkcfg.geometry.heads = conf->heads; /* * We must ensure that the block device capacity is a multiple of @@ -1162,12 +1174,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - blkconf_blocksizes(&conf->conf); - - if (conf->conf.logical_block_size > - conf->conf.physical_block_size) { - error_setg(errp, - "logical_block_size > physical_block_size not supported"); + if (!blkconf_blocksizes(&conf->conf, errp)) { return; } diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 2827c90ac7..1b7bc5de08 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -239,11 +239,7 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) return; } - blkconf_blocksizes(conf); - - if (conf->logical_block_size > conf->physical_block_size) { - error_setg( - errp, "logical_block_size > physical_block_size not supported"); + if (!blkconf_blocksizes(conf, errp)) { return; } diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index cc924815da..ead35d7ffd 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -14,6 +14,7 @@ #include "qapi/visitor.h" #include "chardev/char.h" #include "qemu/uuid.h" +#include "qemu/units.h" void qdev_prop_set_after_realize(DeviceState *dev, const char *name, Error **errp) @@ -727,32 +728,95 @@ const PropertyInfo qdev_prop_pci_devfn = { .set_default_value = set_default_value_int, }; +/* --- 32bit unsigned int 'size' type --- */ + +static void get_size32(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + uint32_t *ptr = qdev_get_prop_ptr(dev, prop); + uint64_t value = *ptr; + + visit_type_size(v, name, &value, errp); +} + +static void set_size32(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + uint32_t *ptr = qdev_get_prop_ptr(dev, prop); + uint64_t value; + Error *local_err = NULL; + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_size(v, name, &value, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (value > UINT32_MAX) { + error_setg(errp, + "Property %s.%s doesn't take value %" PRIu64 + " (maximum: %u)", + dev->id ? : "", name, value, UINT32_MAX); + return; + } + + *ptr = value; +} + +const PropertyInfo qdev_prop_size32 = { + .name = "size", + .get = get_size32, + .set = set_size32, + .set_default_value = set_default_value_uint, +}; + /* --- blocksize --- */ +/* lower limit is sector size */ +#define MIN_BLOCK_SIZE 512 +#define MIN_BLOCK_SIZE_STR "512 B" +/* + * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and + * matches qcow2 cluster size limit + */ +#define MAX_BLOCK_SIZE (2 * MiB) +#define MAX_BLOCK_SIZE_STR "2 MiB" + static void set_blocksize(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { DeviceState *dev = DEVICE(obj); Property *prop = opaque; - uint16_t value, *ptr = qdev_get_prop_ptr(dev, prop); + uint32_t *ptr = qdev_get_prop_ptr(dev, prop); + uint64_t value; Error *local_err = NULL; - const int64_t min = 512; - const int64_t max = 32768; if (dev->realized) { qdev_prop_set_after_realize(dev, name, errp); return; } - visit_type_uint16(v, name, &value, &local_err); + visit_type_size(v, name, &value, &local_err); if (local_err) { error_propagate(errp, local_err); return; } /* value of 0 means "unset" */ - if (value && (value < min || value > max)) { - error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, - dev->id ? : "", name, (int64_t)value, min, max); + if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) { + error_setg(errp, + "Property %s.%s doesn't take value %" PRIu64 + " (minimum: " MIN_BLOCK_SIZE_STR + ", maximum: " MAX_BLOCK_SIZE_STR ")", + dev->id ? : "", name, value); return; } @@ -768,9 +832,10 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, } const PropertyInfo qdev_prop_blocksize = { - .name = "uint16", - .description = "A power of two between 512 and 32768", - .get = get_uint16, + .name = "size", + .description = "A power of two between " MIN_BLOCK_SIZE_STR + " and " MAX_BLOCK_SIZE_STR, + .get = get_size32, .set = set_blocksize, .set_default_value = set_default_value_uint, }; diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 937db10ae6..5e931975a0 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -170,42 +170,9 @@ static void microvm_memory_init(MicrovmMachineState *mms) MemoryRegion *ram_below_4g, *ram_above_4g; MemoryRegion *system_memory = get_system_memory(); FWCfgState *fw_cfg; - ram_addr_t lowmem; + ram_addr_t lowmem = 0xc0000000; /* 3G */ int i; - /* - * Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory - * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping - * also known as MMCFG). - * If it doesn't, we need to split it in chunks below and above 4G. - * In any case, try to make sure that guest addresses aligned at - * 1G boundaries get mapped to host addresses aligned at 1G boundaries. - */ - if (machine->ram_size >= 0xb0000000) { - lowmem = 0x80000000; - } else { - lowmem = 0xb0000000; - } - - /* - * Handle the machine opt max-ram-below-4g. It is basically doing - * min(qemu limit, user limit). - */ - if (!x86ms->max_ram_below_4g) { - x86ms->max_ram_below_4g = 4 * GiB; - } - if (lowmem > x86ms->max_ram_below_4g) { - lowmem = x86ms->max_ram_below_4g; - if (machine->ram_size - lowmem > lowmem && - lowmem & (1 * GiB - 1)) { - warn_report("There is possibly poor performance as the ram size " - " (0x%" PRIx64 ") is more then twice the size of" - " max-ram-below-4g (%"PRIu64") and" - " max-ram-below-4g is not a multiple of 1G.", - (uint64_t)machine->ram_size, x86ms->max_ram_below_4g); - } - } - if (machine->ram_size > lowmem) { x86ms->above_4g_mem_size = machine->ram_size - lowmem; x86ms->below_4g_mem_size = lowmem; diff --git a/hw/i386/pc.c b/hw/i386/pc.c index ec39741c87..d103b8c0ab 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1857,6 +1857,45 @@ static void pc_machine_set_pit(Object *obj, bool value, Error **errp) pcms->pit_enabled = value; } +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + uint64_t value = pcms->max_ram_below_4g; + + visit_type_size(v, name, &value, errp); +} + +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + Error *error = NULL; + uint64_t value; + + visit_type_size(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + if (value > 4 * GiB) { + error_setg(&error, + "Machine option 'max-ram-below-4g=%"PRIu64 + "' expects size less than or equal to 4G", value); + error_propagate(errp, error); + return; + } + + if (value < 1 * MiB) { + warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," + "BIOS may not work with less than 1MiB", value); + } + + pcms->max_ram_below_4g = value; +} + static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); @@ -1866,6 +1905,7 @@ static void pc_machine_initfn(Object *obj) #else pcms->vmport = ON_OFF_AUTO_OFF; #endif /* CONFIG_VMPORT */ + pcms->max_ram_below_4g = 0; /* use default */ /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; pcms->smbus_enabled = true; @@ -1964,6 +2004,12 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->numa_mem_supported = true; mc->default_ram_id = "pc.ram"; + object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", + pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, + NULL, NULL); + object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G, + "Maximum ram below the 4G boundary (32bit boundary)"); + object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int", pc_machine_get_device_memory_region_size, NULL, NULL, NULL); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 054d3aa9f7..1497d0e4ae 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -129,11 +129,11 @@ static void pc_init1(MachineState *machine, if (xen_enabled()) { xen_hvm_init(pcms, &ram_memory); } else { - if (!x86ms->max_ram_below_4g) { - x86ms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ + if (!pcms->max_ram_below_4g) { + pcms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ } - lowmem = x86ms->max_ram_below_4g; - if (machine->ram_size >= x86ms->max_ram_below_4g) { + lowmem = pcms->max_ram_below_4g; + if (machine->ram_size >= pcms->max_ram_below_4g) { if (pcmc->gigabyte_align) { if (lowmem > 0xc0000000) { lowmem = 0xc0000000; @@ -142,7 +142,7 @@ static void pc_init1(MachineState *machine, warn_report("Large machine and max_ram_below_4g " "(%" PRIu64 ") not a multiple of 1G; " "possible bad performance.", - x86ms->max_ram_below_4g); + pcms->max_ram_below_4g); } } } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 18d2fad9c5..46cd06524c 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -156,18 +156,18 @@ static void pc_q35_init(MachineState *machine) /* Handle the machine opt max-ram-below-4g. It is basically doing * min(qemu limit, user limit). */ - if (!x86ms->max_ram_below_4g) { - x86ms->max_ram_below_4g = 4 * GiB; + if (!pcms->max_ram_below_4g) { + pcms->max_ram_below_4g = 4 * GiB; } - if (lowmem > x86ms->max_ram_below_4g) { - lowmem = x86ms->max_ram_below_4g; + if (lowmem > pcms->max_ram_below_4g) { + lowmem = pcms->max_ram_below_4g; if (machine->ram_size - lowmem > lowmem && lowmem & (1 * GiB - 1)) { warn_report("There is possibly poor performance as the ram size " " (0x%" PRIx64 ") is more then twice the size of" " max-ram-below-4g (%"PRIu64") and" " max-ram-below-4g is not a multiple of 1G.", - (uint64_t)machine->ram_size, x86ms->max_ram_below_4g); + (uint64_t)machine->ram_size, pcms->max_ram_below_4g); } } diff --git a/hw/i386/x86.c b/hw/i386/x86.c index bb31935f70..34229b45c7 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -846,45 +846,6 @@ void x86_bios_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) bios); } -static void x86_machine_get_max_ram_below_4g(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - X86MachineState *x86ms = X86_MACHINE(obj); - uint64_t value = x86ms->max_ram_below_4g; - - visit_type_size(v, name, &value, errp); -} - -static void x86_machine_set_max_ram_below_4g(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - X86MachineState *x86ms = X86_MACHINE(obj); - Error *error = NULL; - uint64_t value; - - visit_type_size(v, name, &value, &error); - if (error) { - error_propagate(errp, error); - return; - } - if (value > 4 * GiB) { - error_setg(&error, - "Machine option 'max-ram-below-4g=%"PRIu64 - "' expects size less than or equal to 4G", value); - error_propagate(errp, error); - return; - } - - if (value < 1 * MiB) { - warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," - "BIOS may not work with less than 1MiB", value); - } - - x86ms->max_ram_below_4g = value; -} - bool x86_machine_is_smm_enabled(X86MachineState *x86ms) { bool smm_available = false; @@ -958,7 +919,6 @@ static void x86_machine_initfn(Object *obj) x86ms->smm = ON_OFF_AUTO_AUTO; x86ms->acpi = ON_OFF_AUTO_AUTO; - x86ms->max_ram_below_4g = 0; /* use default */ x86ms->smp_dies = 1; x86ms->apicid_from_cpu_idx = x86_apicid_from_cpu_idx; @@ -980,12 +940,6 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) x86mc->save_tsc_khz = true; nc->nmi_monitor_handler = x86_nmi; - object_class_property_add(oc, X86_MACHINE_MAX_RAM_BELOW_4G, "size", - x86_machine_get_max_ram_below_4g, x86_machine_set_max_ram_below_4g, - NULL, NULL); - object_class_property_set_description(oc, X86_MACHINE_MAX_RAM_BELOW_4G, - "Maximum ram below the 4G boundary (32bit boundary)"); - object_class_property_add(oc, X86_MACHINE_SMM, "OnOffAuto", x86_machine_get_smm, x86_machine_set_smm, NULL, NULL); diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 628bde5fac..cde981bad6 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -205,7 +205,7 @@ static void xen_ram_init(PCMachineState *pcms, ram_addr_t block_len; uint64_t user_lowmem = object_property_get_uint(qdev_get_machine(), - X86_MACHINE_MAX_RAM_BELOW_4G, + PC_MACHINE_MAX_RAM_BELOW_4G, &error_abort); /* Handle the machine opt max-ram-below-4g. It is basically doing diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index fc82cbd5f1..009120f88b 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -44,7 +44,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot); static void ahci_reset_port(AHCIState *s, int port); static bool ahci_write_fis_d2h(AHCIDevice *ad); static void ahci_init_d2h(AHCIDevice *ad); -static int ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit); +static int ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit); static bool ahci_map_clb_address(AHCIDevice *ad); static bool ahci_map_fis_address(AHCIDevice *ad); static void ahci_unmap_clb_address(AHCIDevice *ad); @@ -1338,7 +1338,7 @@ out: } /* Transfer PIO data between RAM and device */ -static void ahci_pio_transfer(IDEDMA *dma) +static void ahci_pio_transfer(const IDEDMA *dma) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); IDEState *s = &ad->port.ifs[0]; @@ -1397,7 +1397,7 @@ out: } } -static void ahci_start_dma(IDEDMA *dma, IDEState *s, +static void ahci_start_dma(const IDEDMA *dma, IDEState *s, BlockCompletionFunc *dma_cb) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -1406,7 +1406,7 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s, dma_cb(s, 0); } -static void ahci_restart_dma(IDEDMA *dma) +static void ahci_restart_dma(const IDEDMA *dma) { /* Nothing to do, ahci_start_dma already resets s->io_buffer_offset. */ } @@ -1415,7 +1415,7 @@ static void ahci_restart_dma(IDEDMA *dma) * IDE/PIO restarts are handled by the core layer, but NCQ commands * need an extra kick from the AHCI HBA. */ -static void ahci_restart(IDEDMA *dma) +static void ahci_restart(const IDEDMA *dma) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); int i; @@ -1432,7 +1432,7 @@ static void ahci_restart(IDEDMA *dma) * Called in DMA and PIO R/W chains to read the PRDT. * Not shared with NCQ pathways. */ -static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit) +static int32_t ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); IDEState *s = &ad->port.ifs[0]; @@ -1453,7 +1453,7 @@ static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit) * Called via dma_buf_commit, for both DMA and PIO paths. * sglist destruction is handled within dma_buf_commit. */ -static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes) +static void ahci_commit_buf(const IDEDMA *dma, uint32_t tx_bytes) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -1461,7 +1461,7 @@ static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes) ad->cur_cmd->status = cpu_to_le32(tx_bytes); } -static int ahci_dma_rw_buf(IDEDMA *dma, bool is_write) +static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); IDEState *s = &ad->port.ifs[0]; @@ -1486,7 +1486,7 @@ static int ahci_dma_rw_buf(IDEDMA *dma, bool is_write) return 1; } -static void ahci_cmd_done(IDEDMA *dma) +static void ahci_cmd_done(const IDEDMA *dma) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); diff --git a/hw/ide/core.c b/hw/ide/core.c index 689bb36409..d997a78e47 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2570,16 +2570,16 @@ static void ide_init1(IDEBus *bus, int unit) ide_sector_write_timer_cb, s); } -static int ide_nop_int(IDEDMA *dma, bool is_write) +static int ide_nop_int(const IDEDMA *dma, bool is_write) { return 0; } -static void ide_nop(IDEDMA *dma) +static void ide_nop(const IDEDMA *dma) { } -static int32_t ide_nop_int32(IDEDMA *dma, int32_t l) +static int32_t ide_nop_int32(const IDEDMA *dma, int32_t l) { return 0; } diff --git a/hw/ide/macio.c b/hw/ide/macio.c index 30af0e93e6..62a599a075 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -376,17 +376,17 @@ static void macio_ide_reset(DeviceState *dev) ide_bus_reset(&d->bus); } -static int ide_nop_int(IDEDMA *dma, bool is_write) +static int ide_nop_int(const IDEDMA *dma, bool is_write) { return 0; } -static int32_t ide_nop_int32(IDEDMA *dma, int32_t l) +static int32_t ide_nop_int32(const IDEDMA *dma, int32_t l) { return 0; } -static void ide_dbdma_start(IDEDMA *dma, IDEState *s, +static void ide_dbdma_start(const IDEDMA *dma, IDEState *s, BlockCompletionFunc *cb) { MACIOIDEState *m = container_of(dma, MACIOIDEState, dma); diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 97347f07f1..5e85c4ad17 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -103,7 +103,7 @@ const MemoryRegionOps pci_ide_data_le_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void bmdma_start_dma(IDEDMA *dma, IDEState *s, +static void bmdma_start_dma(const IDEDMA *dma, IDEState *s, BlockCompletionFunc *dma_cb) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); @@ -126,7 +126,7 @@ static void bmdma_start_dma(IDEDMA *dma, IDEState *s, * IDEState.io_buffer_size will contain the number of bytes described * by the PRDs, whether or not we added them to the sglist. */ -static int32_t bmdma_prepare_buf(IDEDMA *dma, int32_t limit) +static int32_t bmdma_prepare_buf(const IDEDMA *dma, int32_t limit) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); IDEState *s = bmdma_active_if(bm); @@ -181,7 +181,7 @@ static int32_t bmdma_prepare_buf(IDEDMA *dma, int32_t limit) } /* return 0 if buffer completed */ -static int bmdma_rw_buf(IDEDMA *dma, bool is_write) +static int bmdma_rw_buf(const IDEDMA *dma, bool is_write) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); IDEState *s = bmdma_active_if(bm); @@ -230,7 +230,7 @@ static int bmdma_rw_buf(IDEDMA *dma, bool is_write) return 1; } -static void bmdma_set_inactive(IDEDMA *dma, bool more) +static void bmdma_set_inactive(const IDEDMA *dma, bool more) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); @@ -242,7 +242,7 @@ static void bmdma_set_inactive(IDEDMA *dma, bool more) } } -static void bmdma_restart_dma(IDEDMA *dma) +static void bmdma_restart_dma(const IDEDMA *dma) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); @@ -257,7 +257,7 @@ static void bmdma_cancel(BMDMAState *bm) } } -static void bmdma_reset(IDEDMA *dma) +static void bmdma_reset(const IDEDMA *dma) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index caa88526f5..3ccb5e2529 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -187,7 +187,10 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp) return; } - blkconf_blocksizes(&dev->conf); + if (!blkconf_blocksizes(&dev->conf, errp)) { + return; + } + if (dev->conf.logical_block_size != 512) { error_setg(errp, "logical_block_size must be 512 for IDE"); return; diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index f2b73983ee..7ccbf72ea7 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -41,6 +41,7 @@ obj-$(CONFIG_MILKYMIST) += milkymist-minimac2.o obj-$(CONFIG_PSERIES) += spapr_llan.o obj-$(CONFIG_XILINX_ETHLITE) += xilinx_ethlite.o +common-obj-$(CONFIG_VIRTIO_NET) += net_rx_pkt.o obj-$(CONFIG_VIRTIO_NET) += virtio-net.o common-obj-$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET)) += vhost_net.o common-obj-$(call lnot,$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET))) += vhost_net-stub.o diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index e8f9cc7f1e..a93b5c07ce 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -34,120 +34,123 @@ #include "qemu/module.h" #include "sysemu/dma.h" #include "net/checksum.h" - -#ifdef CADENCE_GEM_ERR_DEBUG -#define DB_PRINT(...) do { \ - fprintf(stderr, ": %s: ", __func__); \ - fprintf(stderr, ## __VA_ARGS__); \ - } while (0) -#else - #define DB_PRINT(...) -#endif - -#define GEM_NWCTRL (0x00000000/4) /* Network Control reg */ -#define GEM_NWCFG (0x00000004/4) /* Network Config reg */ -#define GEM_NWSTATUS (0x00000008/4) /* Network Status reg */ -#define GEM_USERIO (0x0000000C/4) /* User IO reg */ -#define GEM_DMACFG (0x00000010/4) /* DMA Control reg */ -#define GEM_TXSTATUS (0x00000014/4) /* TX Status reg */ -#define GEM_RXQBASE (0x00000018/4) /* RX Q Base address reg */ -#define GEM_TXQBASE (0x0000001C/4) /* TX Q Base address reg */ -#define GEM_RXSTATUS (0x00000020/4) /* RX Status reg */ -#define GEM_ISR (0x00000024/4) /* Interrupt Status reg */ -#define GEM_IER (0x00000028/4) /* Interrupt Enable reg */ -#define GEM_IDR (0x0000002C/4) /* Interrupt Disable reg */ -#define GEM_IMR (0x00000030/4) /* Interrupt Mask reg */ -#define GEM_PHYMNTNC (0x00000034/4) /* Phy Maintenance reg */ -#define GEM_RXPAUSE (0x00000038/4) /* RX Pause Time reg */ -#define GEM_TXPAUSE (0x0000003C/4) /* TX Pause Time reg */ -#define GEM_TXPARTIALSF (0x00000040/4) /* TX Partial Store and Forward */ -#define GEM_RXPARTIALSF (0x00000044/4) /* RX Partial Store and Forward */ -#define GEM_HASHLO (0x00000080/4) /* Hash Low address reg */ -#define GEM_HASHHI (0x00000084/4) /* Hash High address reg */ -#define GEM_SPADDR1LO (0x00000088/4) /* Specific addr 1 low reg */ -#define GEM_SPADDR1HI (0x0000008C/4) /* Specific addr 1 high reg */ -#define GEM_SPADDR2LO (0x00000090/4) /* Specific addr 2 low reg */ -#define GEM_SPADDR2HI (0x00000094/4) /* Specific addr 2 high reg */ -#define GEM_SPADDR3LO (0x00000098/4) /* Specific addr 3 low reg */ -#define GEM_SPADDR3HI (0x0000009C/4) /* Specific addr 3 high reg */ -#define GEM_SPADDR4LO (0x000000A0/4) /* Specific addr 4 low reg */ -#define GEM_SPADDR4HI (0x000000A4/4) /* Specific addr 4 high reg */ -#define GEM_TIDMATCH1 (0x000000A8/4) /* Type ID1 Match reg */ -#define GEM_TIDMATCH2 (0x000000AC/4) /* Type ID2 Match reg */ -#define GEM_TIDMATCH3 (0x000000B0/4) /* Type ID3 Match reg */ -#define GEM_TIDMATCH4 (0x000000B4/4) /* Type ID4 Match reg */ -#define GEM_WOLAN (0x000000B8/4) /* Wake on LAN reg */ -#define GEM_IPGSTRETCH (0x000000BC/4) /* IPG Stretch reg */ -#define GEM_SVLAN (0x000000C0/4) /* Stacked VLAN reg */ -#define GEM_MODID (0x000000FC/4) /* Module ID reg */ -#define GEM_OCTTXLO (0x00000100/4) /* Octects transmitted Low reg */ -#define GEM_OCTTXHI (0x00000104/4) /* Octects transmitted High reg */ -#define GEM_TXCNT (0x00000108/4) /* Error-free Frames transmitted */ -#define GEM_TXBCNT (0x0000010C/4) /* Error-free Broadcast Frames */ -#define GEM_TXMCNT (0x00000110/4) /* Error-free Multicast Frame */ -#define GEM_TXPAUSECNT (0x00000114/4) /* Pause Frames Transmitted */ -#define GEM_TX64CNT (0x00000118/4) /* Error-free 64 TX */ -#define GEM_TX65CNT (0x0000011C/4) /* Error-free 65-127 TX */ -#define GEM_TX128CNT (0x00000120/4) /* Error-free 128-255 TX */ -#define GEM_TX256CNT (0x00000124/4) /* Error-free 256-511 */ -#define GEM_TX512CNT (0x00000128/4) /* Error-free 512-1023 TX */ -#define GEM_TX1024CNT (0x0000012C/4) /* Error-free 1024-1518 TX */ -#define GEM_TX1519CNT (0x00000130/4) /* Error-free larger than 1519 TX */ -#define GEM_TXURUNCNT (0x00000134/4) /* TX under run error counter */ -#define GEM_SINGLECOLLCNT (0x00000138/4) /* Single Collision Frames */ -#define GEM_MULTCOLLCNT (0x0000013C/4) /* Multiple Collision Frames */ -#define GEM_EXCESSCOLLCNT (0x00000140/4) /* Excessive Collision Frames */ -#define GEM_LATECOLLCNT (0x00000144/4) /* Late Collision Frames */ -#define GEM_DEFERTXCNT (0x00000148/4) /* Deferred Transmission Frames */ -#define GEM_CSENSECNT (0x0000014C/4) /* Carrier Sense Error Counter */ -#define GEM_OCTRXLO (0x00000150/4) /* Octects Received register Low */ -#define GEM_OCTRXHI (0x00000154/4) /* Octects Received register High */ -#define GEM_RXCNT (0x00000158/4) /* Error-free Frames Received */ -#define GEM_RXBROADCNT (0x0000015C/4) /* Error-free Broadcast Frames RX */ -#define GEM_RXMULTICNT (0x00000160/4) /* Error-free Multicast Frames RX */ -#define GEM_RXPAUSECNT (0x00000164/4) /* Pause Frames Received Counter */ -#define GEM_RX64CNT (0x00000168/4) /* Error-free 64 byte Frames RX */ -#define GEM_RX65CNT (0x0000016C/4) /* Error-free 65-127B Frames RX */ -#define GEM_RX128CNT (0x00000170/4) /* Error-free 128-255B Frames RX */ -#define GEM_RX256CNT (0x00000174/4) /* Error-free 256-512B Frames RX */ -#define GEM_RX512CNT (0x00000178/4) /* Error-free 512-1023B Frames RX */ -#define GEM_RX1024CNT (0x0000017C/4) /* Error-free 1024-1518B Frames RX */ -#define GEM_RX1519CNT (0x00000180/4) /* Error-free 1519-max Frames RX */ -#define GEM_RXUNDERCNT (0x00000184/4) /* Undersize Frames Received */ -#define GEM_RXOVERCNT (0x00000188/4) /* Oversize Frames Received */ -#define GEM_RXJABCNT (0x0000018C/4) /* Jabbers Received Counter */ -#define GEM_RXFCSCNT (0x00000190/4) /* Frame Check seq. Error Counter */ -#define GEM_RXLENERRCNT (0x00000194/4) /* Length Field Error Counter */ -#define GEM_RXSYMERRCNT (0x00000198/4) /* Symbol Error Counter */ -#define GEM_RXALIGNERRCNT (0x0000019C/4) /* Alignment Error Counter */ -#define GEM_RXRSCERRCNT (0x000001A0/4) /* Receive Resource Error Counter */ -#define GEM_RXORUNCNT (0x000001A4/4) /* Receive Overrun Counter */ -#define GEM_RXIPCSERRCNT (0x000001A8/4) /* IP header Checksum Error Counter */ -#define GEM_RXTCPCCNT (0x000001AC/4) /* TCP Checksum Error Counter */ -#define GEM_RXUDPCCNT (0x000001B0/4) /* UDP Checksum Error Counter */ - -#define GEM_1588S (0x000001D0/4) /* 1588 Timer Seconds */ -#define GEM_1588NS (0x000001D4/4) /* 1588 Timer Nanoseconds */ -#define GEM_1588ADJ (0x000001D8/4) /* 1588 Timer Adjust */ -#define GEM_1588INC (0x000001DC/4) /* 1588 Timer Increment */ -#define GEM_PTPETXS (0x000001E0/4) /* PTP Event Frame Transmitted (s) */ -#define GEM_PTPETXNS (0x000001E4/4) /* PTP Event Frame Transmitted (ns) */ -#define GEM_PTPERXS (0x000001E8/4) /* PTP Event Frame Received (s) */ -#define GEM_PTPERXNS (0x000001EC/4) /* PTP Event Frame Received (ns) */ -#define GEM_PTPPTXS (0x000001E0/4) /* PTP Peer Frame Transmitted (s) */ -#define GEM_PTPPTXNS (0x000001E4/4) /* PTP Peer Frame Transmitted (ns) */ -#define GEM_PTPPRXS (0x000001E8/4) /* PTP Peer Frame Received (s) */ -#define GEM_PTPPRXNS (0x000001EC/4) /* PTP Peer Frame Received (ns) */ +#include "net/eth.h" + +#define CADENCE_GEM_ERR_DEBUG 0 +#define DB_PRINT(...) do {\ + if (CADENCE_GEM_ERR_DEBUG) { \ + qemu_log(": %s: ", __func__); \ + qemu_log(__VA_ARGS__); \ + } \ +} while (0) + +#define GEM_NWCTRL (0x00000000 / 4) /* Network Control reg */ +#define GEM_NWCFG (0x00000004 / 4) /* Network Config reg */ +#define GEM_NWSTATUS (0x00000008 / 4) /* Network Status reg */ +#define GEM_USERIO (0x0000000C / 4) /* User IO reg */ +#define GEM_DMACFG (0x00000010 / 4) /* DMA Control reg */ +#define GEM_TXSTATUS (0x00000014 / 4) /* TX Status reg */ +#define GEM_RXQBASE (0x00000018 / 4) /* RX Q Base address reg */ +#define GEM_TXQBASE (0x0000001C / 4) /* TX Q Base address reg */ +#define GEM_RXSTATUS (0x00000020 / 4) /* RX Status reg */ +#define GEM_ISR (0x00000024 / 4) /* Interrupt Status reg */ +#define GEM_IER (0x00000028 / 4) /* Interrupt Enable reg */ +#define GEM_IDR (0x0000002C / 4) /* Interrupt Disable reg */ +#define GEM_IMR (0x00000030 / 4) /* Interrupt Mask reg */ +#define GEM_PHYMNTNC (0x00000034 / 4) /* Phy Maintenance reg */ +#define GEM_RXPAUSE (0x00000038 / 4) /* RX Pause Time reg */ +#define GEM_TXPAUSE (0x0000003C / 4) /* TX Pause Time reg */ +#define GEM_TXPARTIALSF (0x00000040 / 4) /* TX Partial Store and Forward */ +#define GEM_RXPARTIALSF (0x00000044 / 4) /* RX Partial Store and Forward */ +#define GEM_JUMBO_MAX_LEN (0x00000048 / 4) /* Max Jumbo Frame Size */ +#define GEM_HASHLO (0x00000080 / 4) /* Hash Low address reg */ +#define GEM_HASHHI (0x00000084 / 4) /* Hash High address reg */ +#define GEM_SPADDR1LO (0x00000088 / 4) /* Specific addr 1 low reg */ +#define GEM_SPADDR1HI (0x0000008C / 4) /* Specific addr 1 high reg */ +#define GEM_SPADDR2LO (0x00000090 / 4) /* Specific addr 2 low reg */ +#define GEM_SPADDR2HI (0x00000094 / 4) /* Specific addr 2 high reg */ +#define GEM_SPADDR3LO (0x00000098 / 4) /* Specific addr 3 low reg */ +#define GEM_SPADDR3HI (0x0000009C / 4) /* Specific addr 3 high reg */ +#define GEM_SPADDR4LO (0x000000A0 / 4) /* Specific addr 4 low reg */ +#define GEM_SPADDR4HI (0x000000A4 / 4) /* Specific addr 4 high reg */ +#define GEM_TIDMATCH1 (0x000000A8 / 4) /* Type ID1 Match reg */ +#define GEM_TIDMATCH2 (0x000000AC / 4) /* Type ID2 Match reg */ +#define GEM_TIDMATCH3 (0x000000B0 / 4) /* Type ID3 Match reg */ +#define GEM_TIDMATCH4 (0x000000B4 / 4) /* Type ID4 Match reg */ +#define GEM_WOLAN (0x000000B8 / 4) /* Wake on LAN reg */ +#define GEM_IPGSTRETCH (0x000000BC / 4) /* IPG Stretch reg */ +#define GEM_SVLAN (0x000000C0 / 4) /* Stacked VLAN reg */ +#define GEM_MODID (0x000000FC / 4) /* Module ID reg */ +#define GEM_OCTTXLO (0x00000100 / 4) /* Octects transmitted Low reg */ +#define GEM_OCTTXHI (0x00000104 / 4) /* Octects transmitted High reg */ +#define GEM_TXCNT (0x00000108 / 4) /* Error-free Frames transmitted */ +#define GEM_TXBCNT (0x0000010C / 4) /* Error-free Broadcast Frames */ +#define GEM_TXMCNT (0x00000110 / 4) /* Error-free Multicast Frame */ +#define GEM_TXPAUSECNT (0x00000114 / 4) /* Pause Frames Transmitted */ +#define GEM_TX64CNT (0x00000118 / 4) /* Error-free 64 TX */ +#define GEM_TX65CNT (0x0000011C / 4) /* Error-free 65-127 TX */ +#define GEM_TX128CNT (0x00000120 / 4) /* Error-free 128-255 TX */ +#define GEM_TX256CNT (0x00000124 / 4) /* Error-free 256-511 */ +#define GEM_TX512CNT (0x00000128 / 4) /* Error-free 512-1023 TX */ +#define GEM_TX1024CNT (0x0000012C / 4) /* Error-free 1024-1518 TX */ +#define GEM_TX1519CNT (0x00000130 / 4) /* Error-free larger than 1519 TX */ +#define GEM_TXURUNCNT (0x00000134 / 4) /* TX under run error counter */ +#define GEM_SINGLECOLLCNT (0x00000138 / 4) /* Single Collision Frames */ +#define GEM_MULTCOLLCNT (0x0000013C / 4) /* Multiple Collision Frames */ +#define GEM_EXCESSCOLLCNT (0x00000140 / 4) /* Excessive Collision Frames */ +#define GEM_LATECOLLCNT (0x00000144 / 4) /* Late Collision Frames */ +#define GEM_DEFERTXCNT (0x00000148 / 4) /* Deferred Transmission Frames */ +#define GEM_CSENSECNT (0x0000014C / 4) /* Carrier Sense Error Counter */ +#define GEM_OCTRXLO (0x00000150 / 4) /* Octects Received register Low */ +#define GEM_OCTRXHI (0x00000154 / 4) /* Octects Received register High */ +#define GEM_RXCNT (0x00000158 / 4) /* Error-free Frames Received */ +#define GEM_RXBROADCNT (0x0000015C / 4) /* Error-free Broadcast Frames RX */ +#define GEM_RXMULTICNT (0x00000160 / 4) /* Error-free Multicast Frames RX */ +#define GEM_RXPAUSECNT (0x00000164 / 4) /* Pause Frames Received Counter */ +#define GEM_RX64CNT (0x00000168 / 4) /* Error-free 64 byte Frames RX */ +#define GEM_RX65CNT (0x0000016C / 4) /* Error-free 65-127B Frames RX */ +#define GEM_RX128CNT (0x00000170 / 4) /* Error-free 128-255B Frames RX */ +#define GEM_RX256CNT (0x00000174 / 4) /* Error-free 256-512B Frames RX */ +#define GEM_RX512CNT (0x00000178 / 4) /* Error-free 512-1023B Frames RX */ +#define GEM_RX1024CNT (0x0000017C / 4) /* Error-free 1024-1518B Frames RX */ +#define GEM_RX1519CNT (0x00000180 / 4) /* Error-free 1519-max Frames RX */ +#define GEM_RXUNDERCNT (0x00000184 / 4) /* Undersize Frames Received */ +#define GEM_RXOVERCNT (0x00000188 / 4) /* Oversize Frames Received */ +#define GEM_RXJABCNT (0x0000018C / 4) /* Jabbers Received Counter */ +#define GEM_RXFCSCNT (0x00000190 / 4) /* Frame Check seq. Error Counter */ +#define GEM_RXLENERRCNT (0x00000194 / 4) /* Length Field Error Counter */ +#define GEM_RXSYMERRCNT (0x00000198 / 4) /* Symbol Error Counter */ +#define GEM_RXALIGNERRCNT (0x0000019C / 4) /* Alignment Error Counter */ +#define GEM_RXRSCERRCNT (0x000001A0 / 4) /* Receive Resource Error Counter */ +#define GEM_RXORUNCNT (0x000001A4 / 4) /* Receive Overrun Counter */ +#define GEM_RXIPCSERRCNT (0x000001A8 / 4) /* IP header Checksum Err Counter */ +#define GEM_RXTCPCCNT (0x000001AC / 4) /* TCP Checksum Error Counter */ +#define GEM_RXUDPCCNT (0x000001B0 / 4) /* UDP Checksum Error Counter */ + +#define GEM_1588S (0x000001D0 / 4) /* 1588 Timer Seconds */ +#define GEM_1588NS (0x000001D4 / 4) /* 1588 Timer Nanoseconds */ +#define GEM_1588ADJ (0x000001D8 / 4) /* 1588 Timer Adjust */ +#define GEM_1588INC (0x000001DC / 4) /* 1588 Timer Increment */ +#define GEM_PTPETXS (0x000001E0 / 4) /* PTP Event Frame Transmitted (s) */ +#define GEM_PTPETXNS (0x000001E4 / 4) /* + * PTP Event Frame Transmitted (ns) + */ +#define GEM_PTPERXS (0x000001E8 / 4) /* PTP Event Frame Received (s) */ +#define GEM_PTPERXNS (0x000001EC / 4) /* PTP Event Frame Received (ns) */ +#define GEM_PTPPTXS (0x000001E0 / 4) /* PTP Peer Frame Transmitted (s) */ +#define GEM_PTPPTXNS (0x000001E4 / 4) /* PTP Peer Frame Transmitted (ns) */ +#define GEM_PTPPRXS (0x000001E8 / 4) /* PTP Peer Frame Received (s) */ +#define GEM_PTPPRXNS (0x000001EC / 4) /* PTP Peer Frame Received (ns) */ /* Design Configuration Registers */ -#define GEM_DESCONF (0x00000280/4) -#define GEM_DESCONF2 (0x00000284/4) -#define GEM_DESCONF3 (0x00000288/4) -#define GEM_DESCONF4 (0x0000028C/4) -#define GEM_DESCONF5 (0x00000290/4) -#define GEM_DESCONF6 (0x00000294/4) +#define GEM_DESCONF (0x00000280 / 4) +#define GEM_DESCONF2 (0x00000284 / 4) +#define GEM_DESCONF3 (0x00000288 / 4) +#define GEM_DESCONF4 (0x0000028C / 4) +#define GEM_DESCONF5 (0x00000290 / 4) +#define GEM_DESCONF6 (0x00000294 / 4) #define GEM_DESCONF6_64B_MASK (1U << 23) -#define GEM_DESCONF7 (0x00000298/4) +#define GEM_DESCONF7 (0x00000298 / 4) #define GEM_INT_Q1_STATUS (0x00000400 / 4) #define GEM_INT_Q1_MASK (0x00000640 / 4) @@ -211,10 +214,12 @@ #define GEM_NWCFG_LERR_DISC 0x00010000 /* Discard RX frames with len err */ #define GEM_NWCFG_BUFF_OFST_M 0x0000C000 /* Receive buffer offset mask */ #define GEM_NWCFG_BUFF_OFST_S 14 /* Receive buffer offset shift */ +#define GEM_NWCFG_RCV_1538 0x00000100 /* Receive 1538 bytes frame */ #define GEM_NWCFG_UCAST_HASH 0x00000080 /* accept unicast if hash match */ #define GEM_NWCFG_MCAST_HASH 0x00000040 /* accept multicast if hash match */ #define GEM_NWCFG_BCAST_REJ 0x00000020 /* Reject broadcast packets */ #define GEM_NWCFG_PROMISC 0x00000010 /* Accept all packets */ +#define GEM_NWCFG_JUMBO_FRAME 0x00000008 /* Jumbo Frames enable */ #define GEM_DMACFG_ADDR_64B (1U << 30) #define GEM_DMACFG_TX_BD_EXT (1U << 29) @@ -232,6 +237,7 @@ /* GEM_ISR GEM_IER GEM_IDR GEM_IMR */ #define GEM_INT_TXCMPL 0x00000080 /* Transmit Complete */ +#define GEM_INT_AMBA_ERR 0x00000040 #define GEM_INT_TXUSED 0x00000008 #define GEM_INT_RXUSED 0x00000004 #define GEM_INT_RXCMPL 0x00000002 @@ -345,11 +351,6 @@ static inline unsigned tx_desc_get_last(uint32_t *desc) return (desc[1] & DESC_1_TX_LAST) ? 1 : 0; } -static inline void tx_desc_set_last(uint32_t *desc) -{ - desc[1] |= DESC_1_TX_LAST; -} - static inline unsigned tx_desc_get_length(uint32_t *desc) { return desc[1] & DESC_1_LENGTH; @@ -452,6 +453,34 @@ static inline void rx_desc_set_sar(uint32_t *desc, int sar_idx) /* The broadcast MAC address: 0xFFFFFFFFFFFF */ static const uint8_t broadcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static uint32_t gem_get_max_buf_len(CadenceGEMState *s, bool tx) +{ + uint32_t size; + if (s->regs[GEM_NWCFG] & GEM_NWCFG_JUMBO_FRAME) { + size = s->regs[GEM_JUMBO_MAX_LEN]; + if (size > s->jumbo_max_len) { + size = s->jumbo_max_len; + qemu_log_mask(LOG_GUEST_ERROR, "GEM_JUMBO_MAX_LEN reg cannot be" + " greater than 0x%" PRIx32 "\n", s->jumbo_max_len); + } + } else if (tx) { + size = 1518; + } else { + size = s->regs[GEM_NWCFG] & GEM_NWCFG_RCV_1538 ? 1538 : 1518; + } + return size; +} + +static void gem_set_isr(CadenceGEMState *s, int q, uint32_t flag) +{ + if (q == 0) { + s->regs[GEM_ISR] |= flag & ~(s->regs[GEM_IMR]); + } else { + s->regs[GEM_INT_Q1_STATUS + q - 1] |= flag & + ~(s->regs[GEM_INT_Q1_MASK + q - 1]); + } +} + /* * gem_init_register_masks: * One time initialization. @@ -459,6 +488,7 @@ static const uint8_t broadcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; */ static void gem_init_register_masks(CadenceGEMState *s) { + unsigned int i; /* Mask of register bits which are read only */ memset(&s->regs_ro[0], 0, sizeof(s->regs_ro)); s->regs_ro[GEM_NWCTRL] = 0xFFF80000; @@ -471,10 +501,19 @@ static void gem_init_register_masks(CadenceGEMState *s) s->regs_ro[GEM_ISR] = 0xFFFFFFFF; s->regs_ro[GEM_IMR] = 0xFFFFFFFF; s->regs_ro[GEM_MODID] = 0xFFFFFFFF; + for (i = 0; i < s->num_priority_queues; i++) { + s->regs_ro[GEM_INT_Q1_STATUS + i] = 0xFFFFFFFF; + s->regs_ro[GEM_INT_Q1_ENABLE + i] = 0xFFFFF319; + s->regs_ro[GEM_INT_Q1_DISABLE + i] = 0xFFFFF319; + s->regs_ro[GEM_INT_Q1_MASK + i] = 0xFFFFFFFF; + } /* Mask of register bits which are clear on read */ memset(&s->regs_rtc[0], 0, sizeof(s->regs_rtc)); s->regs_rtc[GEM_ISR] = 0xFFFFFFFF; + for (i = 0; i < s->num_priority_queues; i++) { + s->regs_rtc[GEM_INT_Q1_STATUS + i] = 0x00000CE6; + } /* Mask of register bits which are write 1 to clear */ memset(&s->regs_w1c[0], 0, sizeof(s->regs_w1c)); @@ -486,6 +525,10 @@ static void gem_init_register_masks(CadenceGEMState *s) s->regs_wo[GEM_NWCTRL] = 0x00073E60; s->regs_wo[GEM_IER] = 0x07FFFFFF; s->regs_wo[GEM_IDR] = 0x07FFFFFF; + for (i = 0; i < s->num_priority_queues; i++) { + s->regs_wo[GEM_INT_Q1_ENABLE + i] = 0x00000CE6; + s->regs_wo[GEM_INT_Q1_DISABLE + i] = 0x00000CE6; + } } /* @@ -555,29 +598,10 @@ static void gem_update_int_status(CadenceGEMState *s) { int i; - if (!s->regs[GEM_ISR]) { - /* ISR isn't set, clear all the interrupts */ - for (i = 0; i < s->num_priority_queues; ++i) { - qemu_set_irq(s->irq[i], 0); - } - return; - } - - /* If we get here we know s->regs[GEM_ISR] is set, so we don't need to - * check it again. - */ - if (s->num_priority_queues == 1) { - /* No priority queues, just trigger the interrupt */ - DB_PRINT("asserting int.\n"); - qemu_set_irq(s->irq[0], 1); - return; - } + qemu_set_irq(s->irq[0], !!s->regs[GEM_ISR]); - for (i = 0; i < s->num_priority_queues; ++i) { - if (s->regs[GEM_INT_Q1_STATUS + i]) { - DB_PRINT("asserting int. (q=%d)\n", i); - qemu_set_irq(s->irq[i], 1); - } + for (i = 1; i < s->num_priority_queues; ++i) { + qemu_set_irq(s->irq[i], !!s->regs[GEM_INT_Q1_STATUS + i - 1]); } } @@ -679,7 +703,7 @@ static unsigned calc_mac_hash(const uint8_t *mac) static int gem_mac_address_filter(CadenceGEMState *s, const uint8_t *packet) { uint8_t *gem_spaddr; - int i; + int i, is_mc; /* Promiscuous mode? */ if (s->regs[GEM_NWCFG] & GEM_NWCFG_PROMISC) { @@ -695,22 +719,17 @@ static int gem_mac_address_filter(CadenceGEMState *s, const uint8_t *packet) } /* Accept packets -w- hash match? */ - if ((packet[0] == 0x01 && (s->regs[GEM_NWCFG] & GEM_NWCFG_MCAST_HASH)) || - (packet[0] != 0x01 && (s->regs[GEM_NWCFG] & GEM_NWCFG_UCAST_HASH))) { + is_mc = is_multicast_ether_addr(packet); + if ((is_mc && (s->regs[GEM_NWCFG] & GEM_NWCFG_MCAST_HASH)) || + (!is_mc && (s->regs[GEM_NWCFG] & GEM_NWCFG_UCAST_HASH))) { + uint64_t buckets; unsigned hash_index; hash_index = calc_mac_hash(packet); - if (hash_index < 32) { - if (s->regs[GEM_HASHLO] & (1<<hash_index)) { - return packet[0] == 0x01 ? GEM_RX_MULTICAST_HASH_ACCEPT : - GEM_RX_UNICAST_HASH_ACCEPT; - } - } else { - hash_index -= 32; - if (s->regs[GEM_HASHHI] & (1<<hash_index)) { - return packet[0] == 0x01 ? GEM_RX_MULTICAST_HASH_ACCEPT : - GEM_RX_UNICAST_HASH_ACCEPT; - } + buckets = ((uint64_t)s->regs[GEM_HASHHI] << 32) | s->regs[GEM_HASHLO]; + if ((buckets >> hash_index) & 1) { + return is_mc ? GEM_RX_MULTICAST_HASH_ACCEPT + : GEM_RX_UNICAST_HASH_ACCEPT; } } @@ -846,6 +865,35 @@ static int get_queue_from_screen(CadenceGEMState *s, uint8_t *rxbuf_ptr, return 0; } +static uint32_t gem_get_queue_base_addr(CadenceGEMState *s, bool tx, int q) +{ + uint32_t base_addr = 0; + + switch (q) { + case 0: + base_addr = s->regs[tx ? GEM_TXQBASE : GEM_RXQBASE]; + break; + case 1 ... (MAX_PRIORITY_QUEUES - 1): + base_addr = s->regs[(tx ? GEM_TRANSMIT_Q1_PTR : + GEM_RECEIVE_Q1_PTR) + q - 1]; + break; + default: + g_assert_not_reached(); + }; + + return base_addr; +} + +static inline uint32_t gem_get_tx_queue_base_addr(CadenceGEMState *s, int q) +{ + return gem_get_queue_base_addr(s, true, q); +} + +static inline uint32_t gem_get_rx_queue_base_addr(CadenceGEMState *s, int q) +{ + return gem_get_queue_base_addr(s, false, q); +} + static hwaddr gem_get_desc_addr(CadenceGEMState *s, bool tx, int q) { hwaddr desc_addr = 0; @@ -883,7 +931,7 @@ static void gem_get_rx_desc(CadenceGEMState *s, int q) if (rx_desc_get_ownership(s->rx_desc[q]) == 1) { DB_PRINT("descriptor 0x%" HWADDR_PRIx " owned by sw.\n", desc_addr); s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_NOBUF; - s->regs[GEM_ISR] |= GEM_INT_RXUSED & ~(s->regs[GEM_IMR]); + gem_set_isr(s, q, GEM_INT_RXUSED); /* Handle interrupt consequences */ gem_update_int_status(s); } @@ -895,21 +943,18 @@ static void gem_get_rx_desc(CadenceGEMState *s, int q) */ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) { - CadenceGEMState *s; + CadenceGEMState *s = qemu_get_nic_opaque(nc); unsigned rxbufsize, bytes_to_copy; unsigned rxbuf_offset; - uint8_t rxbuf[2048]; uint8_t *rxbuf_ptr; bool first_desc = true; int maf; int q = 0; - s = qemu_get_nic_opaque(nc); - /* Is this destination MAC address "for us" ? */ maf = gem_mac_address_filter(s, buf); if (maf == GEM_RX_REJECT) { - return -1; + return size; /* no, drop siliently b/c it's not an error */ } /* Discard packets with receive length error enabled ? */ @@ -961,29 +1006,35 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) } else { unsigned crc_val; - if (size > sizeof(rxbuf) - sizeof(crc_val)) { - size = sizeof(rxbuf) - sizeof(crc_val); + if (size > MAX_FRAME_SIZE - sizeof(crc_val)) { + size = MAX_FRAME_SIZE - sizeof(crc_val); } bytes_to_copy = size; /* The application wants the FCS field, which QEMU does not provide. * We must try and calculate one. */ - memcpy(rxbuf, buf, size); - memset(rxbuf + size, 0, sizeof(rxbuf) - size); - rxbuf_ptr = rxbuf; - crc_val = cpu_to_le32(crc32(0, rxbuf, MAX(size, 60))); - memcpy(rxbuf + size, &crc_val, sizeof(crc_val)); + memcpy(s->rx_packet, buf, size); + memset(s->rx_packet + size, 0, MAX_FRAME_SIZE - size); + rxbuf_ptr = s->rx_packet; + crc_val = cpu_to_le32(crc32(0, s->rx_packet, MAX(size, 60))); + memcpy(s->rx_packet + size, &crc_val, sizeof(crc_val)); bytes_to_copy += 4; size += 4; } - DB_PRINT("config bufsize: %d packet size: %ld\n", rxbufsize, size); + DB_PRINT("config bufsize: %u packet size: %zd\n", rxbufsize, size); /* Find which queue we are targeting */ q = get_queue_from_screen(s, rxbuf_ptr, rxbufsize); + if (size > gem_get_max_buf_len(s, false)) { + qemu_log_mask(LOG_GUEST_ERROR, "rx frame too long\n"); + gem_set_isr(s, q, GEM_INT_AMBA_ERR); + return -1; + } + while (bytes_to_copy) { hwaddr desc_addr; @@ -992,9 +1043,9 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } - DB_PRINT("copy %u bytes to 0x%" PRIx64 "\n", - MIN(bytes_to_copy, rxbufsize), - rx_desc_get_buffer(s, s->rx_desc[q])); + DB_PRINT("copy %" PRIu32 " bytes to 0x%" PRIx64 "\n", + MIN(bytes_to_copy, rxbufsize), + rx_desc_get_buffer(s, s->rx_desc[q])); /* Copy packet data to emulated DMA buffer */ address_space_write(&s->dma_as, rx_desc_get_buffer(s, s->rx_desc[q]) + @@ -1044,7 +1095,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) /* Next descriptor */ if (rx_desc_get_wrap(s->rx_desc[q])) { DB_PRINT("wrapping RX descriptor list\n"); - s->rx_desc_addr[q] = s->regs[GEM_RXQBASE]; + s->rx_desc_addr[q] = gem_get_rx_queue_base_addr(s, q); } else { DB_PRINT("incrementing RX descriptor list\n"); s->rx_desc_addr[q] += 4 * gem_get_desc_len(s, true); @@ -1057,7 +1108,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) gem_receive_updatestats(s, buf, size); s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_FRMRCVD; - s->regs[GEM_ISR] |= GEM_INT_RXCMPL & ~(s->regs[GEM_IMR]); + gem_set_isr(s, q, GEM_INT_RXCMPL); /* Handle interrupt consequences */ gem_update_int_status(s); @@ -1119,7 +1170,6 @@ static void gem_transmit(CadenceGEMState *s) { uint32_t desc[DESC_MAX_NUM_WORDS]; hwaddr packet_desc_addr; - uint8_t tx_packet[2048]; uint8_t *p; unsigned total_bytes; int q = 0; @@ -1135,7 +1185,7 @@ static void gem_transmit(CadenceGEMState *s) * Packets scattered across multiple descriptors are gathered to this * one contiguous buffer first. */ - p = tx_packet; + p = s->tx_packet; total_bytes = 0; for (q = s->num_priority_queues - 1; q >= 0; q--) { @@ -1160,17 +1210,18 @@ static void gem_transmit(CadenceGEMState *s) */ if ((tx_desc_get_buffer(s, desc) == 0) || (tx_desc_get_length(desc) == 0)) { - DB_PRINT("Invalid TX descriptor @ 0x%x\n", - (unsigned)packet_desc_addr); + DB_PRINT("Invalid TX descriptor @ 0x%" HWADDR_PRIx "\n", + packet_desc_addr); break; } - if (tx_desc_get_length(desc) > sizeof(tx_packet) - - (p - tx_packet)) { - DB_PRINT("TX descriptor @ 0x%" HWADDR_PRIx \ - " too large: size 0x%x space 0x%zx\n", + if (tx_desc_get_length(desc) > gem_get_max_buf_len(s, true) - + (p - s->tx_packet)) { + qemu_log_mask(LOG_GUEST_ERROR, "TX descriptor @ 0x%" \ + HWADDR_PRIx " too large: size 0x%x space 0x%zx\n", packet_desc_addr, tx_desc_get_length(desc), - sizeof(tx_packet) - (p - tx_packet)); + gem_get_max_buf_len(s, true) - (p - s->tx_packet)); + gem_set_isr(s, q, GEM_INT_AMBA_ERR); break; } @@ -1200,7 +1251,7 @@ static void gem_transmit(CadenceGEMState *s) sizeof(desc_first)); /* Advance the hardware current descriptor past this packet */ if (tx_desc_get_wrap(desc)) { - s->tx_desc_addr[q] = s->regs[GEM_TXQBASE]; + s->tx_desc_addr[q] = gem_get_tx_queue_base_addr(s, q); } else { s->tx_desc_addr[q] = packet_desc_addr + 4 * gem_get_desc_len(s, false); @@ -1208,43 +1259,36 @@ static void gem_transmit(CadenceGEMState *s) DB_PRINT("TX descriptor next: 0x%08x\n", s->tx_desc_addr[q]); s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_TXCMPL; - s->regs[GEM_ISR] |= GEM_INT_TXCMPL & ~(s->regs[GEM_IMR]); - - /* Update queue interrupt status */ - if (s->num_priority_queues > 1) { - s->regs[GEM_INT_Q1_STATUS + q] |= - GEM_INT_TXCMPL & ~(s->regs[GEM_INT_Q1_MASK + q]); - } + gem_set_isr(s, q, GEM_INT_TXCMPL); /* Handle interrupt consequences */ gem_update_int_status(s); /* Is checksum offload enabled? */ if (s->regs[GEM_DMACFG] & GEM_DMACFG_TXCSUM_OFFL) { - net_checksum_calculate(tx_packet, total_bytes); + net_checksum_calculate(s->tx_packet, total_bytes); } /* Update MAC statistics */ - gem_transmit_updatestats(s, tx_packet, total_bytes); + gem_transmit_updatestats(s, s->tx_packet, total_bytes); /* Send the packet somewhere */ if (s->phy_loop || (s->regs[GEM_NWCTRL] & GEM_NWCTRL_LOCALLOOP)) { - gem_receive(qemu_get_queue(s->nic), tx_packet, + gem_receive(qemu_get_queue(s->nic), s->tx_packet, total_bytes); } else { - qemu_send_packet(qemu_get_queue(s->nic), tx_packet, + qemu_send_packet(qemu_get_queue(s->nic), s->tx_packet, total_bytes); } /* Prepare for next packet */ - p = tx_packet; + p = s->tx_packet; total_bytes = 0; } /* read next descriptor */ if (tx_desc_get_wrap(desc)) { - tx_desc_set_last(desc); if (s->regs[GEM_DMACFG] & GEM_DMACFG_ADDR_64B) { packet_desc_addr = s->regs[GEM_TBQPH]; @@ -1252,7 +1296,7 @@ static void gem_transmit(CadenceGEMState *s) } else { packet_desc_addr = 0; } - packet_desc_addr |= s->regs[GEM_TXQBASE]; + packet_desc_addr |= gem_get_tx_queue_base_addr(s, q); } else { packet_desc_addr += 4 * gem_get_desc_len(s, false); } @@ -1264,7 +1308,10 @@ static void gem_transmit(CadenceGEMState *s) if (tx_desc_get_used(desc)) { s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_USED; - s->regs[GEM_ISR] |= GEM_INT_TXUSED & ~(s->regs[GEM_IMR]); + /* IRQ TXUSED is defined only for queue 0 */ + if (q == 0) { + gem_set_isr(s, 0, GEM_INT_TXUSED); + } gem_update_int_status(s); } } @@ -1314,10 +1361,12 @@ static void gem_reset(DeviceState *d) s->regs[GEM_TXPARTIALSF] = 0x000003ff; s->regs[GEM_RXPARTIALSF] = 0x000003ff; s->regs[GEM_MODID] = s->revision; - s->regs[GEM_DESCONF] = 0x02500111; - s->regs[GEM_DESCONF2] = 0x2ab13fff; + s->regs[GEM_DESCONF] = 0x02D00111; + s->regs[GEM_DESCONF2] = 0x2ab10000 | s->jumbo_max_len; s->regs[GEM_DESCONF5] = 0x002f2045; s->regs[GEM_DESCONF6] = GEM_DESCONF6_64B_MASK; + s->regs[GEM_INT_Q1_MASK] = 0x00000CE6; + s->regs[GEM_JUMBO_MAX_LEN] = s->jumbo_max_len; if (s->num_priority_queues > 1) { queues_mask = MAKE_64BIT_MASK(1, s->num_priority_queues - 1); @@ -1458,7 +1507,7 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val, if (!(val & GEM_NWCTRL_TXENA)) { /* Reset to start of Q when transmit disabled. */ for (i = 0; i < s->num_priority_queues; i++) { - s->tx_desc_addr[i] = s->regs[GEM_TXQBASE]; + s->tx_desc_addr[i] = gem_get_tx_queue_base_addr(s, i); } } if (gem_can_receive(qemu_get_queue(s->nic))) { @@ -1488,6 +1537,9 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val, s->regs[GEM_IMR] &= ~val; gem_update_int_status(s); break; + case GEM_JUMBO_MAX_LEN: + s->regs[GEM_JUMBO_MAX_LEN] = val & MAX_JUMBO_FRAME_SIZE_MASK; + break; case GEM_INT_Q1_ENABLE ... GEM_INT_Q7_ENABLE: s->regs[GEM_INT_Q1_MASK + offset - GEM_INT_Q1_ENABLE] &= ~val; gem_update_int_status(s); @@ -1582,6 +1634,12 @@ static void gem_realize(DeviceState *dev, Error **errp) s->nic = qemu_new_nic(&net_gem_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); + + if (s->jumbo_max_len > MAX_FRAME_SIZE) { + error_setg(errp, "jumbo-max-len is greater than %d", + MAX_FRAME_SIZE); + return; + } } static void gem_init(Object *obj) @@ -1630,6 +1688,8 @@ static Property gem_properties[] = { num_type1_screeners, 4), DEFINE_PROP_UINT8("num-type2-screeners", CadenceGEMState, num_type2_screeners, 4), + DEFINE_PROP_UINT16("jumbo-max-len", CadenceGEMState, + jumbo_max_len, 10240), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index d5676871fa..bcd186cac5 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -34,9 +34,9 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "net/net.h" #include "net/tap.h" -#include "hw/hw.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "sysemu/runstate.h" @@ -2816,11 +2816,15 @@ e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val) if (core->mac[RCTL] & E1000_RCTL_DTYP_MASK) { if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) { - hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero"); + qemu_log_mask(LOG_GUEST_ERROR, + "e1000e: PSRCTL.BSIZE0 cannot be zero"); + return; } if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) { - hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero"); + qemu_log_mask(LOG_GUEST_ERROR, + "e1000e: PSRCTL.BSIZE1 cannot be zero"); + return; } } diff --git a/hw/net/trace-events b/hw/net/trace-events index 26700dad99..e6875c4c0f 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -381,6 +381,9 @@ virtio_net_announce_notify(void) "" virtio_net_announce_timer(int round) "%d" virtio_net_handle_announce(int round) "%d" virtio_net_post_load_device(void) +virtio_net_rss_disable(void) +virtio_net_rss_error(const char *msg, uint32_t value) "%s, value 0x%08x" +virtio_net_rss_enable(uint32_t p1, uint16_t p2, uint8_t p3) "hashes 0x%x, table of %d, key of %d" # tulip.c tulip_reg_write(uint64_t addr, const char *name, int size, uint64_t val) "addr 0x%02"PRIx64" (%s) size %d value 0x%08"PRIx64 diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 6cefc0add2..4487fd61cf 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -171,9 +171,6 @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc) len = s->rx_frame_len; } - if (s->rx_frame_len + len > sizeof(s->rx_frame)) { - return; - } pci_dma_write(&s->dev, desc->buf_addr1, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; @@ -186,9 +183,6 @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc) len = s->rx_frame_len; } - if (s->rx_frame_len + len > sizeof(s->rx_frame)) { - return; - } pci_dma_write(&s->dev, desc->buf_addr2, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; @@ -584,6 +578,9 @@ static int tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc) int len2 = (desc->control >> TDES1_BUF2_SIZE_SHIFT) & TDES1_BUF2_SIZE_MASK; if (s->tx_frame_len + len1 > sizeof(s->tx_frame)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: descriptor overflow (ofs: %u, len:%d, size:%zu)\n", + __func__, s->tx_frame_len, len1, sizeof(s->tx_frame)); return -1; } if (len1) { @@ -593,6 +590,9 @@ static int tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc) } if (s->tx_frame_len + len2 > sizeof(s->tx_frame)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: descriptor overflow (ofs: %u, len:%d, size:%zu)\n", + __func__, s->tx_frame_len, len2, sizeof(s->tx_frame)); return -1; } if (len2) { diff --git a/hw/net/tulip.h b/hw/net/tulip.h index 97521b21db..5271aad8d5 100644 --- a/hw/net/tulip.h +++ b/hw/net/tulip.h @@ -211,7 +211,7 @@ #define RDES0_RF BIT(11) #define RDES0_DT_SHIFT 12 #define RDES0_DT_MASK 3 -#define RDES0_LE BIT(14) +#define RDES0_DE BIT(14) #define RDES0_ES BIT(15) #define RDES0_FL_SHIFT 16 #define RDES0_FL_MASK 0x3fff diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index b7f3d1b2eb..aff67a92df 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -42,6 +42,7 @@ #include "trace.h" #include "monitor/qdev.h" #include "hw/pci/pci.h" +#include "net_rx_pkt.h" #define VIRTIO_NET_VM_VERSION 11 @@ -77,25 +78,15 @@ tso/gso/gro 'off'. */ #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 -/* temporary until standard header include it */ -#if !defined(VIRTIO_NET_HDR_F_RSC_INFO) - -#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ -#define VIRTIO_NET_F_RSC_EXT 61 - -#endif - -static inline __virtio16 *virtio_net_rsc_ext_num_packets( - struct virtio_net_hdr *hdr) -{ - return &hdr->csum_start; -} - -static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( - struct virtio_net_hdr *hdr) -{ - return &hdr->csum_offset; -} +#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ + VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ + VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ + VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ + VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ + VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ + VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ + VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ + VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) static VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_NET_F_MAC, @@ -108,6 +99,8 @@ static VirtIOFeature feature_sizes[] = { .end = endof(struct virtio_net_config, mtu)}, {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, .end = endof(struct virtio_net_config, duplex)}, + {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), + .end = endof(struct virtio_net_config, supported_hash_types)}, {} }; @@ -138,6 +131,12 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) memcpy(netcfg.mac, n->mac, ETH_ALEN); virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); netcfg.duplex = n->net_conf.duplex; + netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; + virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, + virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? + VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); + virtio_stl_p(vdev, &netcfg.supported_hash_types, + VIRTIO_NET_RSS_SUPPORTED_HASHES); memcpy(config, &netcfg, n->config_size); } @@ -561,7 +560,7 @@ static int peer_has_ufo(VirtIONet *n) } static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, - int version_1) + int version_1, int hash_report) { int i; NetClientState *nc; @@ -569,7 +568,10 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, n->mergeable_rx_bufs = mergeable_rx_bufs; if (version_1) { - n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + n->guest_hdr_len = hash_report ? + sizeof(struct virtio_net_hdr_v1_hash) : + sizeof(struct virtio_net_hdr_mrg_rxbuf); + n->rss_data.populate_hash = !!hash_report; } else { n->guest_hdr_len = n->mergeable_rx_bufs ? sizeof(struct virtio_net_hdr_mrg_rxbuf) : @@ -690,6 +692,8 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); } if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { @@ -701,6 +705,8 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, return features; } + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); features = vhost_net_get_features(get_vhost_net(nc->peer), features); vdev->backend_features = features; @@ -860,18 +866,22 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) } virtio_net_set_multiqueue(n, + virtio_has_feature(features, VIRTIO_NET_F_RSS) || virtio_has_feature(features, VIRTIO_NET_F_MQ)); virtio_net_set_mrg_rx_bufs(n, virtio_has_feature(features, VIRTIO_NET_F_MRG_RXBUF), virtio_has_feature(features, - VIRTIO_F_VERSION_1)); + VIRTIO_F_VERSION_1), + virtio_has_feature(features, + VIRTIO_NET_F_HASH_REPORT)); n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); + n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); if (n->has_vnet_hdr) { n->curr_guest_offloads = @@ -1136,25 +1146,165 @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, } } +static void virtio_net_disable_rss(VirtIONet *n) +{ + if (n->rss_data.enabled) { + trace_virtio_net_rss_disable(); + } + n->rss_data.enabled = false; +} + +static uint16_t virtio_net_handle_rss(VirtIONet *n, + struct iovec *iov, + unsigned int iov_cnt, + bool do_rss) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct virtio_net_rss_config cfg; + size_t s, offset = 0, size_get; + uint16_t queues, i; + struct { + uint16_t us; + uint8_t b; + } QEMU_PACKED temp; + const char *err_msg = ""; + uint32_t err_value = 0; + + if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { + err_msg = "RSS is not negotiated"; + goto error; + } + if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { + err_msg = "Hash report is not negotiated"; + goto error; + } + size_get = offsetof(struct virtio_net_rss_config, indirection_table); + s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); + if (s != size_get) { + err_msg = "Short command buffer"; + err_value = (uint32_t)s; + goto error; + } + n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); + n->rss_data.indirections_len = + virtio_lduw_p(vdev, &cfg.indirection_table_mask); + n->rss_data.indirections_len++; + if (!do_rss) { + n->rss_data.indirections_len = 1; + } + if (!is_power_of_2(n->rss_data.indirections_len)) { + err_msg = "Invalid size of indirection table"; + err_value = n->rss_data.indirections_len; + goto error; + } + if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { + err_msg = "Too large indirection table"; + err_value = n->rss_data.indirections_len; + goto error; + } + n->rss_data.default_queue = do_rss ? + virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; + if (n->rss_data.default_queue >= n->max_queues) { + err_msg = "Invalid default queue"; + err_value = n->rss_data.default_queue; + goto error; + } + offset += size_get; + size_get = sizeof(uint16_t) * n->rss_data.indirections_len; + g_free(n->rss_data.indirections_table); + n->rss_data.indirections_table = g_malloc(size_get); + if (!n->rss_data.indirections_table) { + err_msg = "Can't allocate indirections table"; + err_value = n->rss_data.indirections_len; + goto error; + } + s = iov_to_buf(iov, iov_cnt, offset, + n->rss_data.indirections_table, size_get); + if (s != size_get) { + err_msg = "Short indirection table buffer"; + err_value = (uint32_t)s; + goto error; + } + for (i = 0; i < n->rss_data.indirections_len; ++i) { + uint16_t val = n->rss_data.indirections_table[i]; + n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); + } + offset += size_get; + size_get = sizeof(temp); + s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); + if (s != size_get) { + err_msg = "Can't get queues"; + err_value = (uint32_t)s; + goto error; + } + queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues; + if (queues == 0 || queues > n->max_queues) { + err_msg = "Invalid number of queues"; + err_value = queues; + goto error; + } + if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { + err_msg = "Invalid key size"; + err_value = temp.b; + goto error; + } + if (!temp.b && n->rss_data.hash_types) { + err_msg = "No key provided"; + err_value = 0; + goto error; + } + if (!temp.b && !n->rss_data.hash_types) { + virtio_net_disable_rss(n); + return queues; + } + offset += size_get; + size_get = temp.b; + s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); + if (s != size_get) { + err_msg = "Can get key buffer"; + err_value = (uint32_t)s; + goto error; + } + n->rss_data.enabled = true; + trace_virtio_net_rss_enable(n->rss_data.hash_types, + n->rss_data.indirections_len, + temp.b); + return queues; +error: + trace_virtio_net_rss_error(err_msg, err_value); + virtio_net_disable_rss(n); + return 0; +} + static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, struct iovec *iov, unsigned int iov_cnt) { VirtIODevice *vdev = VIRTIO_DEVICE(n); - struct virtio_net_ctrl_mq mq; - size_t s; uint16_t queues; - s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); - if (s != sizeof(mq)) { - return VIRTIO_NET_ERR; - } + virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { + queues = virtio_net_handle_rss(n, iov, iov_cnt, false); + return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR; + } + if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { + queues = virtio_net_handle_rss(n, iov, iov_cnt, true); + } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + struct virtio_net_ctrl_mq mq; + size_t s; + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { + return VIRTIO_NET_ERR; + } + s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); + if (s != sizeof(mq)) { + return VIRTIO_NET_ERR; + } + queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); - if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + } else { return VIRTIO_NET_ERR; } - queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); - if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || queues > n->max_queues || @@ -1387,8 +1537,107 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) return 0; } +static uint8_t virtio_net_get_hash_type(bool isip4, + bool isip6, + bool isudp, + bool istcp, + uint32_t types) +{ + if (isip4) { + if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { + return NetPktRssIpV4Tcp; + } + if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { + return NetPktRssIpV4Udp; + } + if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { + return NetPktRssIpV4; + } + } else if (isip6) { + uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | + VIRTIO_NET_RSS_HASH_TYPE_TCPv6; + + if (istcp && (types & mask)) { + return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? + NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; + } + mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; + if (isudp && (types & mask)) { + return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? + NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; + } + mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; + if (types & mask) { + return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? + NetPktRssIpV6Ex : NetPktRssIpV6; + } + } + return 0xff; +} + +static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, + uint32_t hash) +{ + struct virtio_net_hdr_v1_hash *hdr = (void *)buf; + hdr->hash_value = hash; + hdr->hash_report = report; +} + +static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + unsigned int index = nc->queue_index, new_index = index; + struct NetRxPkt *pkt = n->rx_pkt; + uint8_t net_hash_type; + uint32_t hash; + bool isip4, isip6, isudp, istcp; + static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { + VIRTIO_NET_HASH_REPORT_IPv4, + VIRTIO_NET_HASH_REPORT_TCPv4, + VIRTIO_NET_HASH_REPORT_TCPv6, + VIRTIO_NET_HASH_REPORT_IPv6, + VIRTIO_NET_HASH_REPORT_IPv6_EX, + VIRTIO_NET_HASH_REPORT_TCPv6_EX, + VIRTIO_NET_HASH_REPORT_UDPv4, + VIRTIO_NET_HASH_REPORT_UDPv6, + VIRTIO_NET_HASH_REPORT_UDPv6_EX + }; + + net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, + size - n->host_hdr_len); + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { + istcp = isudp = false; + } + if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { + istcp = isudp = false; + } + net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, + n->rss_data.hash_types); + if (net_hash_type > NetPktRssIpV6UdpEx) { + if (n->rss_data.populate_hash) { + virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); + } + return n->rss_data.redirect ? n->rss_data.default_queue : -1; + } + + hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); + + if (n->rss_data.populate_hash) { + virtio_set_packet_hash(buf, reports[net_hash_type], hash); + } + + if (n->rss_data.redirect) { + new_index = hash & (n->rss_data.indirections_len - 1); + new_index = n->rss_data.indirections_table[new_index]; + } + + return (index == new_index) ? -1 : new_index; +} + static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, - size_t size) + size_t size, bool no_rss) { VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q = virtio_net_get_subqueue(nc); @@ -1402,6 +1651,14 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, return -1; } + if (!no_rss && n->rss_data.enabled) { + int index = virtio_net_process_rss(nc, buf, size); + if (index >= 0) { + NetClientState *nc2 = qemu_get_subqueue(n->nic, index); + return virtio_net_receive_rcu(nc2, buf, size, true); + } + } + /* hdr_len refers to the header we supply to the guest */ if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { return 0; @@ -1452,6 +1709,11 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, } receive_header(n, sg, elem->in_num, buf, size); + if (n->rss_data.populate_hash) { + offset = sizeof(mhdr); + iov_from_buf(sg, elem->in_num, offset, + buf + offset, n->host_hdr_len - sizeof(mhdr)); + } offset = n->host_hdr_len; total += n->guest_hdr_len; guest_offset = n->guest_hdr_len; @@ -1496,7 +1758,7 @@ static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, { RCU_READ_LOCK_GUARD(); - return virtio_net_receive_rcu(nc, buf, size); + return virtio_net_receive_rcu(nc, buf, size, false); } static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, @@ -1539,15 +1801,15 @@ static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, VirtioNetRscSeg *seg) { int ret; - struct virtio_net_hdr *h; + struct virtio_net_hdr_v1 *h; - h = (struct virtio_net_hdr *)seg->buf; + h = (struct virtio_net_hdr_v1 *)seg->buf; h->flags = 0; h->gso_type = VIRTIO_NET_HDR_GSO_NONE; if (seg->is_coalesced) { - *virtio_net_rsc_ext_num_packets(h) = seg->packets; - *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack; + h->rsc.segments = seg->packets; + h->rsc.dup_acks = seg->dup_ack; h->flags = VIRTIO_NET_HDR_F_RSC_INFO; if (chain->proto == ETH_P_IP) { h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; @@ -2444,7 +2706,9 @@ static int virtio_net_post_load_device(void *opaque, int version_id) trace_virtio_net_post_load_device(); virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, virtio_vdev_has_feature(vdev, - VIRTIO_F_VERSION_1)); + VIRTIO_F_VERSION_1), + virtio_vdev_has_feature(vdev, + VIRTIO_NET_F_HASH_REPORT)); /* MAC_TABLE_ENTRIES may be different from the saved image */ if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { @@ -2493,6 +2757,13 @@ static int virtio_net_post_load_device(void *opaque, int version_id) } } + if (n->rss_data.enabled) { + trace_virtio_net_rss_enable(n->rss_data.hash_types, + n->rss_data.indirections_len, + sizeof(n->rss_data.key)); + } else { + trace_virtio_net_rss_disable(); + } return 0; } @@ -2670,6 +2941,32 @@ static const VMStateDescription vmstate_virtio_net_has_vnet = { }, }; +static bool virtio_net_rss_needed(void *opaque) +{ + return VIRTIO_NET(opaque)->rss_data.enabled; +} + +static const VMStateDescription vmstate_virtio_net_rss = { + .name = "virtio-net-device/rss", + .version_id = 1, + .minimum_version_id = 1, + .needed = virtio_net_rss_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(rss_data.enabled, VirtIONet), + VMSTATE_BOOL(rss_data.redirect, VirtIONet), + VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), + VMSTATE_UINT32(rss_data.hash_types, VirtIONet), + VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), + VMSTATE_UINT16(rss_data.default_queue, VirtIONet), + VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, + VIRTIO_NET_RSS_MAX_KEY_SIZE), + VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, + rss_data.indirections_len, 0, + vmstate_info_uint16, uint16_t), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_virtio_net_device = { .name = "virtio-net-device", .version_id = VIRTIO_NET_VM_VERSION, @@ -2720,6 +3017,10 @@ static const VMStateDescription vmstate_virtio_net_device = { has_ctrl_guest_offloads), VMSTATE_END_OF_LIST() }, + .subsections = (const VMStateDescription * []) { + &vmstate_virtio_net_rss, + NULL + } }; static NetClientInfo net_virtio_info = { @@ -3063,7 +3364,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->vqs[0].tx_waiting = 0; n->tx_burst = n->net_conf.txburst; - virtio_net_set_mrg_rx_bufs(n, 0, 0); + virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); n->promisc = 1; /* for compatibility */ n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); @@ -3075,6 +3376,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) QTAILQ_INIT(&n->rsc_chains); n->qdev = dev; + + net_rx_pkt_init(&n->rx_pkt, false); } static void virtio_net_device_unrealize(DeviceState *dev) @@ -3111,6 +3414,8 @@ static void virtio_net_device_unrealize(DeviceState *dev) g_free(n->vqs); qemu_del_nic(n->nic); virtio_net_rsc_cleanup(n); + g_free(n->rss_data.indirections_table); + net_rx_pkt_uninit(n->rx_pkt); virtio_cleanup(vdev); } @@ -3212,6 +3517,10 @@ static Property virtio_net_properties[] = { DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), + DEFINE_PROP_BIT64("rss", VirtIONet, host_features, + VIRTIO_NET_F_RSS, false), + DEFINE_PROP_BIT64("hash", VirtIONet, host_features, + VIRTIO_NET_F_HASH_REPORT, false), DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, VIRTIO_NET_F_RSC_EXT, false), DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 387503e11b..8ce68a9dd6 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2346,12 +2346,7 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } - blkconf_blocksizes(&s->qdev.conf); - - if (s->qdev.conf.logical_block_size > - s->qdev.conf.physical_block_size) { - error_setg(errp, - "logical_block_size > physical_block_size not supported"); + if (!blkconf_blocksizes(&s->qdev.conf, errp)) { return; } @@ -2436,7 +2431,9 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) if (s->qdev.conf.blk) { ctx = blk_get_aio_context(s->qdev.conf.blk); aio_context_acquire(ctx); - blkconf_blocksizes(&s->qdev.conf); + if (!blkconf_blocksizes(&s->qdev.conf, errp)) { + goto out; + } } s->qdev.blocksize = s->qdev.conf.logical_block_size; s->qdev.type = TYPE_DISK; @@ -2444,6 +2441,7 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) s->product = g_strdup("QEMU HARDDISK"); } scsi_realize(&s->qdev, errp); +out: if (ctx) { aio_context_release(ctx); } diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index a5204b6f2a..f5977eb72e 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -612,7 +612,10 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) return; } - blkconf_blocksizes(&s->conf); + if (!blkconf_blocksizes(&s->conf, errp)) { + return; + } + if (!blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, errp)) { return; diff --git a/include/block/nvme.h b/include/block/nvme.h index 5525c8e343..1720ee1d51 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -705,6 +705,14 @@ typedef struct NvmePSD { uint8_t resv[16]; } NvmePSD; +#define NVME_IDENTIFY_DATA_SIZE 4096 + +enum { + NVME_ID_CNS_NS = 0x0, + NVME_ID_CNS_CTRL = 0x1, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x2, +}; + typedef struct NvmeIdCtrl { uint16_t vid; uint16_t ssvid; diff --git a/include/hw/block/block.h b/include/hw/block/block.h index d7246f3862..1e8b6253dd 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -18,9 +18,9 @@ typedef struct BlockConf { BlockBackend *blk; - uint16_t physical_block_size; - uint16_t logical_block_size; - uint16_t min_io_size; + uint32_t physical_block_size; + uint32_t logical_block_size; + uint32_t min_io_size; uint32_t opt_io_size; int32_t bootindex; uint32_t discard_granularity; @@ -51,9 +51,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) _conf.logical_block_size), \ DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ _conf.physical_block_size), \ - DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0), \ - DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ - DEFINE_PROP_UINT32("discard_granularity", _state, \ + DEFINE_PROP_SIZE32("min_io_size", _state, _conf.min_io_size, 0), \ + DEFINE_PROP_SIZE32("opt_io_size", _state, _conf.opt_io_size, 0), \ + DEFINE_PROP_SIZE32("discard_granularity", _state, \ _conf.discard_granularity, -1), \ DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \ ON_OFF_AUTO_AUTO), \ @@ -87,7 +87,7 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size, bool blkconf_geometry(BlockConf *conf, int *trans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp); -void blkconf_blocksizes(BlockConf *conf); +bool blkconf_blocksizes(BlockConf *conf, Error **errp); bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, bool resizable, Error **errp); diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h index ba68d1f22b..fd34b78e0d 100644 --- a/include/hw/i386/microvm.h +++ b/include/hw/i386/microvm.h @@ -26,7 +26,7 @@ #include "hw/i386/x86.h" /* Platform virtio definitions */ -#define VIRTIO_MMIO_BASE 0xc0000000 +#define VIRTIO_MMIO_BASE 0xfeb00000 #define VIRTIO_IRQ_BASE 5 #define VIRTIO_NUM_TRANSPORTS 8 #define VIRTIO_CMDLINE_MAXLEN 64 diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 8d764f965c..e6135c34d6 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -35,6 +35,7 @@ struct PCMachineState { PFlashCFI01 *flash[2]; /* Configuration options: */ + uint64_t max_ram_below_4g; OnOffAuto vmport; bool acpi_build_enabled; @@ -51,6 +52,7 @@ struct PCMachineState { }; #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" #define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size" #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMBUS "smbus" diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index b522854816..b79f24e285 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -51,9 +51,6 @@ typedef struct { qemu_irq *gsi; GMappedFile *initrd_mapped_file; - /* Configuration options: */ - uint64_t max_ram_below_4g; - /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; @@ -82,7 +79,6 @@ typedef struct { AddressSpace *ioapic_as; } X86MachineState; -#define X86_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" #define X86_MACHINE_SMM "smm" #define X86_MACHINE_ACPI "acpi" diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index 55da35d768..1a7869e85d 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -322,12 +322,12 @@ typedef enum { IDE_HD, IDE_CD, IDE_CFATA } IDEDriveKind; typedef void EndTransferFunc(IDEState *); -typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *); -typedef void DMAVoidFunc(IDEDMA *); -typedef int DMAIntFunc(IDEDMA *, bool); -typedef int32_t DMAInt32Func(IDEDMA *, int32_t len); -typedef void DMAu32Func(IDEDMA *, uint32_t); -typedef void DMAStopFunc(IDEDMA *, bool); +typedef void DMAStartFunc(const IDEDMA *, IDEState *, BlockCompletionFunc *); +typedef void DMAVoidFunc(const IDEDMA *); +typedef int DMAIntFunc(const IDEDMA *, bool); +typedef int32_t DMAInt32Func(const IDEDMA *, int32_t len); +typedef void DMAu32Func(const IDEDMA *, uint32_t); +typedef void DMAStopFunc(const IDEDMA *, bool); struct unreported_events { bool eject_request; diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h index 5c83036ade..54e646ff79 100644 --- a/include/hw/net/cadence_gem.h +++ b/include/hw/net/cadence_gem.h @@ -40,6 +40,9 @@ #define MAX_TYPE1_SCREENERS 16 #define MAX_TYPE2_SCREENERS 16 +#define MAX_JUMBO_FRAME_SIZE_MASK 0x3FFF +#define MAX_FRAME_SIZE MAX_JUMBO_FRAME_SIZE_MASK + typedef struct CadenceGEMState { /*< private >*/ SysBusDevice parent_obj; @@ -57,6 +60,7 @@ typedef struct CadenceGEMState { uint8_t num_type1_screeners; uint8_t num_type2_screeners; uint32_t revision; + uint16_t jumbo_max_len; /* GEM registers backing store */ uint32_t regs[CADENCE_GEM_MAXREG]; @@ -80,6 +84,8 @@ typedef struct CadenceGEMState { uint8_t can_rx_state; /* Debug only */ + uint8_t tx_packet[MAX_FRAME_SIZE]; + uint8_t rx_packet[MAX_FRAME_SIZE]; uint32_t rx_desc[MAX_PRIORITY_QUEUES][DESC_MAX_NUM_WORDS]; bool sar_active[4]; diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index f161604fb6..5252bb6b1a 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -29,6 +29,7 @@ extern const PropertyInfo qdev_prop_drive; extern const PropertyInfo qdev_prop_drive_iothread; extern const PropertyInfo qdev_prop_netdev; extern const PropertyInfo qdev_prop_pci_devfn; +extern const PropertyInfo qdev_prop_size32; extern const PropertyInfo qdev_prop_blocksize; extern const PropertyInfo qdev_prop_pci_host_devaddr; extern const PropertyInfo qdev_prop_uuid; @@ -196,8 +197,10 @@ extern const PropertyInfo qdev_prop_pcie_link_width; BlockdevOnError) #define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int) +#define DEFINE_PROP_SIZE32(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t) #define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \ - DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint16_t) + DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint32_t) #define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress) #define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \ diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 1e62f869b2..b1334c3904 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -86,5 +86,6 @@ typedef struct MultiReqBuffer { } MultiReqBuffer; bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); #endif diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 96c68d4a92..a45ef8278e 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -126,6 +126,20 @@ typedef struct VirtioNetRscChain { /* Maximum packet size we can receive from tap device: header + 64k */ #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 * KiB)) +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 +#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 + +typedef struct VirtioNetRssData { + bool enabled; + bool redirect; + bool populate_hash; + uint32_t hash_types; + uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; + uint16_t indirections_len; + uint16_t *indirections_table; + uint16_t default_queue; +} VirtioNetRssData; + typedef struct VirtIONetQueue { VirtQueue *rx_vq; VirtQueue *tx_vq; @@ -199,6 +213,8 @@ struct VirtIONet { bool failover; DeviceListener primary_listener; Notifier migration_state; + VirtioNetRssData rss_data; + struct NetRxPkt *rx_pkt; }; void virtio_net_set_netclient_name(VirtIONet *n, const char *name, diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index eafa39f560..f68ed7db13 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -432,6 +432,16 @@ extern const VMStateInfo vmstate_info_qlist; .offset = vmstate_offset_pointer(_state, _field, _type), \ } +#define VMSTATE_VARRAY_UINT16_ALLOC(_field, _state, _field_num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_UINT16 | VMS_POINTER | VMS_ALLOC, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + #define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, _info, _type) {\ .name = (stringify(_field)), \ .version_id = (_version), \ diff --git a/migration/multifd.c b/migration/multifd.c index 5a3e4d0d46..d0441202aa 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -415,6 +415,12 @@ static int multifd_send_pages(QEMUFile *f) } qemu_sem_wait(&multifd_send_state->channels_ready); + /* + * next_channel can remain from a previous migration that was + * using more channels, so ensure it doesn't overflow if the + * limit is lower now. + */ + next_channel %= migrate_multifd_channels(); for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { p = &multifd_send_state->params[i]; diff --git a/migration/ram.c b/migration/ram.c index 41cc530d9d..069b6e30bc 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -913,10 +913,8 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) * TARGET_PAGE_SIZE; encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev; - if (xbzrle_counters.pages == rs->xbzrle_pages_prev) { + if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) { xbzrle_counters.encoding_rate = 0; - } else if (!encoded_size) { - xbzrle_counters.encoding_rate = UINT64_MAX; } else { xbzrle_counters.encoding_rate = unencoded_size / encoded_size; } diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index e03adf0d4d..2b0b58a336 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -299,7 +299,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->bytes >> 10); monitor_printf(mon, "xbzrle pages: %" PRIu64 " pages\n", info->xbzrle_cache->pages); - monitor_printf(mon, "xbzrle cache miss: %" PRIu64 "\n", + monitor_printf(mon, "xbzrle cache miss: %" PRIu64 " pages\n", info->xbzrle_cache->cache_miss); monitor_printf(mon, "xbzrle cache miss rate: %0.2f\n", info->xbzrle_cache->cache_miss_rate); @@ -316,8 +316,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->compression->busy); monitor_printf(mon, "compression busy rate: %0.2f\n", info->compression->busy_rate); - monitor_printf(mon, "compressed size: %" PRIu64 "\n", - info->compression->compressed_size); + monitor_printf(mon, "compressed size: %" PRIu64 " kbytes\n", + info->compression->compressed_size >> 10); monitor_printf(mon, "compression rate: %0.2f\n", info->compression->compression_rate); } @@ -443,11 +443,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) MigrationParameter_str(MIGRATION_PARAMETER_MAX_BANDWIDTH), params->max_bandwidth); assert(params->has_downtime_limit); - monitor_printf(mon, "%s: %" PRIu64 " milliseconds\n", + monitor_printf(mon, "%s: %" PRIu64 " ms\n", MigrationParameter_str(MIGRATION_PARAMETER_DOWNTIME_LIMIT), params->downtime_limit); assert(params->has_x_checkpoint_delay); - monitor_printf(mon, "%s: %u\n", + monitor_printf(mon, "%s: %u ms\n", MigrationParameter_str(MIGRATION_PARAMETER_X_CHECKPOINT_DELAY), params->x_checkpoint_delay); assert(params->has_block_incremental); @@ -460,7 +460,7 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %s\n", MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION), MultiFDCompression_str(params->multifd_compression)); - monitor_printf(mon, "%s: %" PRIu64 "\n", + monitor_printf(mon, "%s: %" PRIu64 " bytes\n", MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE), params->xbzrle_cache_size); monitor_printf(mon, "%s: %" PRIu64 "\n", @@ -1189,8 +1189,11 @@ void hmp_migrate_pause(Monitor *mon, const QDict *qdict) /* Kept for backwards compatibility */ void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict) { + Error *err = NULL; + double value = qdict_get_double(qdict, "value"); - qmp_migrate_set_downtime(value, NULL); + qmp_migrate_set_downtime(value, &err); + hmp_handle_error(mon, err); } void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict) @@ -1499,8 +1502,7 @@ void hmp_change(Monitor *mon, const QDict *qdict) read_only, BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN, &err); if (err) { - hmp_handle_error(mon, err); - return; + goto end; } } @@ -1509,6 +1511,7 @@ void hmp_change(Monitor *mon, const QDict *qdict) &err); } +end: hmp_handle_error(mon, err); } @@ -1627,16 +1630,15 @@ void hmp_object_add(Monitor *mon, const QDict *qdict) opts = qemu_opts_from_qdict(qemu_find_opts("object"), qdict, &err); if (err) { - hmp_handle_error(mon, err); - return; + goto end; } obj = user_creatable_add_opts(opts, &err); qemu_opts_del(opts); - if (err) { - hmp_handle_error(mon, err); - } +end: + hmp_handle_error(mon, err); + if (obj) { object_unref(obj); } diff --git a/net/colo-compare.c b/net/colo-compare.c index c07e7c1c09..f15779dedc 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -32,6 +32,9 @@ #include "migration/migration.h" #include "util.h" +#include "block/aio-wait.h" +#include "qemu/coroutine.h" + #define TYPE_COLO_COMPARE "colo-compare" #define COLO_COMPARE(obj) \ OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) @@ -51,6 +54,8 @@ static NotifierList colo_compare_notifiers = #define REGULAR_PACKET_CHECK_MS 3000 #define DEFAULT_TIME_OUT_MS 3000 +static QemuMutex colo_compare_mutex; +static bool colo_compare_active; static QemuMutex event_mtx; static QemuCond event_complete_cond; static int event_unhandled_count; @@ -77,6 +82,23 @@ static int event_unhandled_count; * |packet | |packet + |packet | |packet + * +--------+ +--------+ +--------+ +--------+ */ + +typedef struct SendCo { + Coroutine *co; + struct CompareState *s; + CharBackend *chr; + GQueue send_list; + bool notify_remote_frame; + bool done; + int ret; +} SendCo; + +typedef struct SendEntry { + uint32_t size; + uint32_t vnet_hdr_len; + uint8_t *buf; +} SendEntry; + typedef struct CompareState { Object parent; @@ -91,6 +113,8 @@ typedef struct CompareState { SocketReadState pri_rs; SocketReadState sec_rs; SocketReadState notify_rs; + SendCo out_sendco; + SendCo notify_sendco; bool vnet_hdr; uint32_t compare_timeout; uint32_t expired_scan_cycle; @@ -122,12 +146,17 @@ enum { SECONDARY_IN, }; +static const char *colo_mode[] = { + [PRIMARY_IN] = "primary", + [SECONDARY_IN] = "secondary", +}; static int compare_chr_send(CompareState *s, - const uint8_t *buf, + uint8_t *buf, uint32_t size, uint32_t vnet_hdr_len, - bool notify_remote_frame); + bool notify_remote_frame, + bool zero_copy); static bool packet_matches_str(const char *str, const uint8_t *buf, @@ -145,7 +174,7 @@ static void notify_remote_frame(CompareState *s) char msg[] = "DO_CHECKPOINT"; int ret = 0; - ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true); + ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, false); if (ret < 0) { error_report("Notify Xen COLO-frame failed"); } @@ -217,6 +246,7 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) ConnectionKey key; Packet *pkt = NULL; Connection *conn; + int ret; if (mode == PRIMARY_IN) { pkt = packet_new(s->pri_rs.buf, @@ -245,16 +275,18 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) } if (mode == PRIMARY_IN) { - if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) { - error_report("colo compare primary queue size too big," - "drop packet"); - } + ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack); } else { - if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) { - error_report("colo compare secondary queue size too big," - "drop packet"); - } + ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack); } + + if (!ret) { + trace_colo_compare_drop_packet(colo_mode[mode], + "queue size too big, drop packet"); + packet_destroy(pkt, NULL); + pkt = NULL; + } + *con = conn; return 0; @@ -272,12 +304,13 @@ static void colo_release_primary_pkt(CompareState *s, Packet *pkt) pkt->data, pkt->size, pkt->vnet_hdr_len, - false); + false, + true); if (ret < 0) { error_report("colo send primary packet failed"); } trace_colo_compare_main("packet same and release packet"); - packet_destroy(pkt, NULL); + packet_destroy_partial(pkt, NULL); } /* @@ -459,10 +492,12 @@ sec: g_queue_push_head(&conn->primary_list, ppkt); g_queue_push_head(&conn->secondary_list, spkt); - qemu_hexdump((char *)ppkt->data, stderr, - "colo-compare ppkt", ppkt->size); - qemu_hexdump((char *)spkt->data, stderr, - "colo-compare spkt", spkt->size); + if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { + qemu_hexdump((char *)ppkt->data, stderr, + "colo-compare ppkt", ppkt->size); + qemu_hexdump((char *)spkt->data, stderr, + "colo-compare spkt", spkt->size); + } colo_compare_inconsistency_notify(s); } @@ -699,65 +734,115 @@ static void colo_compare_connection(void *opaque, void *user_data) } } -static int compare_chr_send(CompareState *s, - const uint8_t *buf, - uint32_t size, - uint32_t vnet_hdr_len, - bool notify_remote_frame) +static void coroutine_fn _compare_chr_send(void *opaque) { + SendCo *sendco = opaque; + CompareState *s = sendco->s; int ret = 0; - uint32_t len = htonl(size); - if (!size) { - return 0; - } + while (!g_queue_is_empty(&sendco->send_list)) { + SendEntry *entry = g_queue_pop_tail(&sendco->send_list); + uint32_t len = htonl(entry->size); - if (notify_remote_frame) { - ret = qemu_chr_fe_write_all(&s->chr_notify_dev, - (uint8_t *)&len, - sizeof(len)); - } else { - ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); - } + ret = qemu_chr_fe_write_all(sendco->chr, (uint8_t *)&len, sizeof(len)); - if (ret != sizeof(len)) { - goto err; - } + if (ret != sizeof(len)) { + g_free(entry->buf); + g_slice_free(SendEntry, entry); + goto err; + } - if (s->vnet_hdr) { - /* - * We send vnet header len make other module(like filter-redirector) - * know how to parse net packet correctly. - */ - len = htonl(vnet_hdr_len); + if (!sendco->notify_remote_frame && s->vnet_hdr) { + /* + * We send vnet header len make other module(like filter-redirector) + * know how to parse net packet correctly. + */ + len = htonl(entry->vnet_hdr_len); - if (!notify_remote_frame) { - ret = qemu_chr_fe_write_all(&s->chr_out, + ret = qemu_chr_fe_write_all(sendco->chr, (uint8_t *)&len, sizeof(len)); + + if (ret != sizeof(len)) { + g_free(entry->buf); + g_slice_free(SendEntry, entry); + goto err; + } } - if (ret != sizeof(len)) { + ret = qemu_chr_fe_write_all(sendco->chr, + (uint8_t *)entry->buf, + entry->size); + + if (ret != entry->size) { + g_free(entry->buf); + g_slice_free(SendEntry, entry); goto err; } + + g_free(entry->buf); + g_slice_free(SendEntry, entry); + } + + sendco->ret = 0; + goto out; + +err: + while (!g_queue_is_empty(&sendco->send_list)) { + SendEntry *entry = g_queue_pop_tail(&sendco->send_list); + g_free(entry->buf); + g_slice_free(SendEntry, entry); } + sendco->ret = ret < 0 ? ret : -EIO; +out: + sendco->co = NULL; + sendco->done = true; + aio_wait_kick(); +} + +static int compare_chr_send(CompareState *s, + uint8_t *buf, + uint32_t size, + uint32_t vnet_hdr_len, + bool notify_remote_frame, + bool zero_copy) +{ + SendCo *sendco; + SendEntry *entry; if (notify_remote_frame) { - ret = qemu_chr_fe_write_all(&s->chr_notify_dev, - (uint8_t *)buf, - size); + sendco = &s->notify_sendco; } else { - ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); + sendco = &s->out_sendco; } - if (ret != size) { - goto err; + if (!size) { + return 0; } - return 0; + entry = g_slice_new(SendEntry); + entry->size = size; + entry->vnet_hdr_len = vnet_hdr_len; + if (zero_copy) { + entry->buf = buf; + } else { + entry->buf = g_malloc(size); + memcpy(entry->buf, buf, size); + } + g_queue_push_head(&sendco->send_list, entry); + + if (sendco->done) { + sendco->co = qemu_coroutine_create(_compare_chr_send, sendco); + sendco->done = false; + qemu_coroutine_enter(sendco->co); + if (sendco->done) { + /* report early errors */ + return sendco->ret; + } + } -err: - return ret < 0 ? ret : -EIO; + /* assume success */ + return 0; } static int compare_chr_can_read(void *opaque) @@ -830,6 +915,12 @@ static void check_old_packet_regular(void *opaque) void colo_notify_compares_event(void *opaque, int event, Error **errp) { CompareState *s; + qemu_mutex_lock(&colo_compare_mutex); + + if (!colo_compare_active) { + qemu_mutex_unlock(&colo_compare_mutex); + return; + } qemu_mutex_lock(&event_mtx); QTAILQ_FOREACH(s, &net_compares, next) { @@ -843,6 +934,7 @@ void colo_notify_compares_event(void *opaque, int event, Error **errp) } qemu_mutex_unlock(&event_mtx); + qemu_mutex_unlock(&colo_compare_mutex); } static void colo_compare_timer_init(CompareState *s) @@ -890,6 +982,7 @@ static void colo_compare_handle_event(void *opaque) static void colo_compare_iothread(CompareState *s) { + AioContext *ctx = iothread_get_aio_context(s->iothread); object_ref(OBJECT(s->iothread)); s->worker_context = iothread_get_g_main_context(s->iothread); @@ -906,7 +999,7 @@ static void colo_compare_iothread(CompareState *s) } colo_compare_timer_init(s); - s->event_bh = qemu_bh_new(colo_compare_handle_event, s); + s->event_bh = aio_bh_new(ctx, colo_compare_handle_event, s); } static char *compare_get_pri_indev(Object *obj, Error **errp) @@ -1062,6 +1155,7 @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs) pri_rs->buf, pri_rs->packet_len, pri_rs->vnet_hdr_len, + false, false); } else { /* compare packet in the specified connection */ @@ -1092,7 +1186,7 @@ static void compare_notify_rs_finalize(SocketReadState *notify_rs) if (packet_matches_str("COLO_USERSPACE_PROXY_INIT", notify_rs->buf, notify_rs->packet_len)) { - ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true); + ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, false); if (ret < 0) { error_report("Notify Xen COLO-frame INIT failed"); } @@ -1196,12 +1290,21 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) s->vnet_hdr); } - QTAILQ_INSERT_TAIL(&net_compares, s, next); + s->out_sendco.s = s; + s->out_sendco.chr = &s->chr_out; + s->out_sendco.notify_remote_frame = false; + s->out_sendco.done = true; + g_queue_init(&s->out_sendco.send_list); - g_queue_init(&s->conn_list); + if (s->notify_dev) { + s->notify_sendco.s = s; + s->notify_sendco.chr = &s->chr_notify_dev; + s->notify_sendco.notify_remote_frame = true; + s->notify_sendco.done = true; + g_queue_init(&s->notify_sendco.send_list); + } - qemu_mutex_init(&event_mtx); - qemu_cond_init(&event_complete_cond); + g_queue_init(&s->conn_list); s->connection_track_table = g_hash_table_new_full(connection_key_hash, connection_key_equal, @@ -1209,6 +1312,16 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) connection_destroy); colo_compare_iothread(s); + + qemu_mutex_lock(&colo_compare_mutex); + if (!colo_compare_active) { + qemu_mutex_init(&event_mtx); + qemu_cond_init(&event_complete_cond); + colo_compare_active = true; + } + QTAILQ_INSERT_TAIL(&net_compares, s, next); + qemu_mutex_unlock(&colo_compare_mutex); + return; } @@ -1224,8 +1337,9 @@ static void colo_flush_packets(void *opaque, void *user_data) pkt->data, pkt->size, pkt->vnet_hdr_len, - false); - packet_destroy(pkt, NULL); + false, + true); + packet_destroy_partial(pkt, NULL); } while (!g_queue_is_empty(&conn->secondary_list)) { pkt = g_queue_pop_head(&conn->secondary_list); @@ -1276,6 +1390,20 @@ static void colo_compare_finalize(Object *obj) CompareState *s = COLO_COMPARE(obj); CompareState *tmp = NULL; + qemu_mutex_lock(&colo_compare_mutex); + QTAILQ_FOREACH(tmp, &net_compares, next) { + if (tmp == s) { + QTAILQ_REMOVE(&net_compares, s, next); + break; + } + } + if (QTAILQ_EMPTY(&net_compares)) { + colo_compare_active = false; + qemu_mutex_destroy(&event_mtx); + qemu_cond_destroy(&event_complete_cond); + } + qemu_mutex_unlock(&colo_compare_mutex); + qemu_chr_fe_deinit(&s->chr_pri_in, false); qemu_chr_fe_deinit(&s->chr_sec_in, false); qemu_chr_fe_deinit(&s->chr_out, false); @@ -1289,17 +1417,23 @@ static void colo_compare_finalize(Object *obj) qemu_bh_delete(s->event_bh); - QTAILQ_FOREACH(tmp, &net_compares, next) { - if (tmp == s) { - QTAILQ_REMOVE(&net_compares, s, next); - break; - } + AioContext *ctx = iothread_get_aio_context(s->iothread); + aio_context_acquire(ctx); + AIO_WAIT_WHILE(ctx, !s->out_sendco.done); + if (s->notify_dev) { + AIO_WAIT_WHILE(ctx, !s->notify_sendco.done); } + aio_context_release(ctx); /* Release all unhandled packets after compare thead exited */ g_queue_foreach(&s->conn_list, colo_flush_packets, s); + AIO_WAIT_WHILE(NULL, !s->out_sendco.done); g_queue_clear(&s->conn_list); + g_queue_clear(&s->out_sendco.send_list); + if (s->notify_dev) { + g_queue_clear(&s->notify_sendco.send_list); + } if (s->connection_track_table) { g_hash_table_destroy(s->connection_track_table); @@ -1309,15 +1443,18 @@ static void colo_compare_finalize(Object *obj) object_unref(OBJECT(s->iothread)); } - qemu_mutex_destroy(&event_mtx); - qemu_cond_destroy(&event_complete_cond); - g_free(s->pri_indev); g_free(s->sec_indev); g_free(s->outdev); g_free(s->notify_dev); } +static void __attribute__((__constructor__)) colo_compare_init_globals(void) +{ + colo_compare_active = false; + qemu_mutex_init(&colo_compare_mutex); +} + static const TypeInfo colo_compare_info = { .name = TYPE_COLO_COMPARE, .parent = TYPE_OBJECT, diff --git a/net/colo.c b/net/colo.c index 8196b35837..a6c66d829a 100644 --- a/net/colo.c +++ b/net/colo.c @@ -185,6 +185,13 @@ void packet_destroy(void *opaque, void *user_data) g_slice_free(Packet, pkt); } +void packet_destroy_partial(void *opaque, void *user_data) +{ + Packet *pkt = opaque; + + g_slice_free(Packet, pkt); +} + /* * Clear hashtable, stop this hash growing really huge */ diff --git a/net/colo.h b/net/colo.h index 679314b1ca..573ab91785 100644 --- a/net/colo.h +++ b/net/colo.h @@ -102,5 +102,6 @@ bool connection_has_tracked(GHashTable *connection_track_table, void connection_hashtable_reset(GHashTable *connection_track_table); Packet *packet_new(const void *data, int size, int vnet_hdr_len); void packet_destroy(void *opaque, void *user_data); +void packet_destroy_partial(void *opaque, void *user_data); #endif /* NET_COLO_H */ @@ -610,7 +610,7 @@ void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge) qemu_notify_event(); } else if (purge) { /* Unable to empty the queue, purge remaining packets */ - qemu_net_queue_purge(nc->incoming_queue, nc); + qemu_net_queue_purge(nc->incoming_queue, nc->peer); } } @@ -965,17 +965,11 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( }; -static int net_client_init1(const void *object, bool is_netdev, Error **errp) +static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp) { - Netdev legacy = {0}; - const Netdev *netdev; - const char *name; NetClientState *peer = NULL; if (is_netdev) { - netdev = object; - name = netdev->id; - if (netdev->type == NET_CLIENT_DRIVER_NIC || !net_client_init_fun[netdev->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", @@ -983,62 +977,11 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) return -1; } } else { - const NetLegacy *net = object; - const NetLegacyOptions *opts = net->opts; - legacy.id = net->id; - netdev = &legacy; - /* missing optional values have been initialized to "all bits zero" */ - name = net->has_id ? net->id : net->name; - - if (net->has_name) { - warn_report("The 'name' parameter is deprecated, use 'id' instead"); - } - - /* Map the old options to the new flat type */ - switch (opts->type) { - case NET_LEGACY_OPTIONS_TYPE_NONE: + if (netdev->type == NET_CLIENT_DRIVER_NONE) { return 0; /* nothing to do */ - case NET_LEGACY_OPTIONS_TYPE_NIC: - legacy.type = NET_CLIENT_DRIVER_NIC; - legacy.u.nic = opts->u.nic; - break; - case NET_LEGACY_OPTIONS_TYPE_USER: - legacy.type = NET_CLIENT_DRIVER_USER; - legacy.u.user = opts->u.user; - break; - case NET_LEGACY_OPTIONS_TYPE_TAP: - legacy.type = NET_CLIENT_DRIVER_TAP; - legacy.u.tap = opts->u.tap; - break; - case NET_LEGACY_OPTIONS_TYPE_L2TPV3: - legacy.type = NET_CLIENT_DRIVER_L2TPV3; - legacy.u.l2tpv3 = opts->u.l2tpv3; - break; - case NET_LEGACY_OPTIONS_TYPE_SOCKET: - legacy.type = NET_CLIENT_DRIVER_SOCKET; - legacy.u.socket = opts->u.socket; - break; - case NET_LEGACY_OPTIONS_TYPE_VDE: - legacy.type = NET_CLIENT_DRIVER_VDE; - legacy.u.vde = opts->u.vde; - break; - case NET_LEGACY_OPTIONS_TYPE_BRIDGE: - legacy.type = NET_CLIENT_DRIVER_BRIDGE; - legacy.u.bridge = opts->u.bridge; - break; - case NET_LEGACY_OPTIONS_TYPE_NETMAP: - legacy.type = NET_CLIENT_DRIVER_NETMAP; - legacy.u.netmap = opts->u.netmap; - break; - case NET_LEGACY_OPTIONS_TYPE_VHOST_USER: - legacy.type = NET_CLIENT_DRIVER_VHOST_USER; - legacy.u.vhost_user = opts->u.vhost_user; - break; - default: - abort(); } - - if (!net_client_init_fun[netdev->type]) { + if (netdev->type == NET_CLIENT_DRIVER_HUBPORT || + !net_client_init_fun[netdev->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net backend type (maybe it is not compiled " "into this binary)"); @@ -1047,12 +990,12 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) /* Do not add to a hub if it's a nic with a netdev= parameter. */ if (netdev->type != NET_CLIENT_DRIVER_NIC || - !opts->u.nic.has_netdev) { + !netdev->u.nic.has_netdev) { peer = net_hub_add_port(0, NULL, NULL); } } - if (net_client_init_fun[netdev->type](netdev, name, peer, errp) < 0) { + if (net_client_init_fun[netdev->type](netdev, netdev->id, peer, errp) < 0) { /* FIXME drop when all init functions store an Error */ if (errp && !*errp) { error_setg(errp, QERR_DEVICE_INIT_FAILED, @@ -1108,7 +1051,7 @@ static void show_netdevs(void) static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp) { gchar **substrings = NULL; - void *object = NULL; + Netdev *object = NULL; Error *err = NULL; int ret = -1; Visitor *v = opts_visitor_new(opts); @@ -1151,21 +1094,19 @@ static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp) } } - if (is_netdev) { - visit_type_Netdev(v, NULL, (Netdev **)&object, &err); - } else { - visit_type_NetLegacy(v, NULL, (NetLegacy **)&object, &err); + /* Create an ID for -net if the user did not specify one */ + if (!is_netdev && !qemu_opts_id(opts)) { + static int idx; + qemu_opts_set_id(opts, g_strdup_printf("__org.qemu.net%i", idx++)); } + visit_type_Netdev(v, NULL, &object, &err); + if (!err) { ret = net_client_init1(object, is_netdev, &err); } - if (is_netdev) { - qapi_free_Netdev(object); - } else { - qapi_free_NetLegacy(object); - } + qapi_free_Netdev(object); out: error_propagate(errp, err); @@ -254,7 +254,8 @@ static void tap_set_vnet_hdr_len(NetClientState *nc, int len) assert(nc->info->type == NET_CLIENT_DRIVER_TAP); assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || - len == sizeof(struct virtio_net_hdr)); + len == sizeof(struct virtio_net_hdr) || + len == sizeof(struct virtio_net_hdr_v1_hash)); tap_fd_set_vnet_hdr_len(s->fd, len); s->host_vnet_hdr_len = len; diff --git a/net/trace-events b/net/trace-events index 02c13fd0ba..fa49c71533 100644 --- a/net/trace-events +++ b/net/trace-events @@ -12,6 +12,7 @@ colo_proxy_main(const char *chr) ": %s" # colo-compare.c colo_compare_main(const char *chr) ": %s" +colo_compare_drop_packet(const char *queue, const char *chr) ": %s: %s" colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d" colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" diff --git a/qapi/net.json b/qapi/net.json index cebb1b52e3..9244c9af56 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -468,55 +468,6 @@ 'vhost-user': 'NetdevVhostUserOptions' } } ## -# @NetLegacy: -# -# Captures the configuration of a network device; legacy. -# -# @id: identifier for monitor commands -# -# @name: identifier for monitor commands, ignored if @id is present -# -# @opts: device type specific properties (legacy) -# -# Since: 1.2 -## -{ 'struct': 'NetLegacy', - 'data': { - '*id': 'str', - '*name': 'str', - 'opts': 'NetLegacyOptions' } } - -## -# @NetLegacyOptionsType: -# -# Since: 1.2 -## -{ 'enum': 'NetLegacyOptionsType', - 'data': ['none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', - 'bridge', 'netmap', 'vhost-user'] } - -## -# @NetLegacyOptions: -# -# Like Netdev, but for use only by the legacy command line options -# -# Since: 1.2 -## -{ 'union': 'NetLegacyOptions', - 'base': { 'type': 'NetLegacyOptionsType' }, - 'discriminator': 'type', - 'data': { - 'nic': 'NetLegacyNicOptions', - 'user': 'NetdevUserOptions', - 'tap': 'NetdevTapOptions', - 'l2tpv3': 'NetdevL2TPv3Options', - 'socket': 'NetdevSocketOptions', - 'vde': 'NetdevVdeOptions', - 'bridge': 'NetdevBridgeOptions', - 'netmap': 'NetdevNetmapOptions', - 'vhost-user': 'NetdevVhostUserOptions' } } - -## # @NetFilterDirection: # # Indicates whether a netfilter is attached to a netdev's transmit queue or diff --git a/qom/qom-hmp-cmds.c b/qom/qom-hmp-cmds.c index 99385b6ad2..b0abe84cb1 100644 --- a/qom/qom-hmp-cmds.c +++ b/qom/qom-hmp-cmds.c @@ -44,15 +44,27 @@ void hmp_qom_list(Monitor *mon, const QDict *qdict) void hmp_qom_set(Monitor *mon, const QDict *qdict) { + const bool json = qdict_get_try_bool(qdict, "json", false); const char *path = qdict_get_str(qdict, "path"); const char *property = qdict_get_str(qdict, "property"); const char *value = qdict_get_str(qdict, "value"); Error *err = NULL; - QObject *obj; - obj = qobject_from_json(value, &err); - if (err == NULL) { - qmp_qom_set(path, property, obj, &err); + if (!json) { + Object *obj = object_resolve_path(path, NULL); + + if (!obj) { + error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", path); + } else { + object_property_parse(obj, value, property, &err); + } + } else { + QObject *obj = qobject_from_json(value, &err); + + if (!err) { + qmp_qom_set(path, property, obj, &err); + } } hmp_handle_error(mon, err); @@ -71,6 +83,7 @@ void hmp_qom_get(Monitor *mon, const QDict *qdict) qobject_unref(str); } + qobject_unref(obj); hmp_handle_error(mon, err); } diff --git a/tests/migration/stress.c b/tests/migration/stress.c index 0c23964693..a062ef6b55 100644 --- a/tests/migration/stress.c +++ b/tests/migration/stress.c @@ -167,29 +167,17 @@ static unsigned long long now(void) return (tv.tv_sec * 1000ull) + (tv.tv_usec / 1000ull); } -static int stressone(unsigned long long ramsizeMB) +static void stressone(unsigned long long ramsizeMB) { size_t pagesPerMB = 1024 * 1024 / PAGE_SIZE; - char *ram = malloc(ramsizeMB * 1024 * 1024); + g_autofree char *ram = g_malloc(ramsizeMB * 1024 * 1024); char *ramptr; size_t i, j, k; - char *data = malloc(PAGE_SIZE); + g_autofree char *data = g_malloc(PAGE_SIZE); char *dataptr; size_t nMB = 0; unsigned long long before, after; - if (!ram) { - fprintf(stderr, "%s (%05d): ERROR: cannot allocate %llu MB of RAM: %s\n", - argv0, gettid(), ramsizeMB, strerror(errno)); - return -1; - } - if (!data) { - fprintf(stderr, "%s (%d): ERROR: cannot allocate %d bytes of RAM: %s\n", - argv0, gettid(), PAGE_SIZE, strerror(errno)); - free(ram); - return -1; - } - /* We don't care about initial state, but we do want * to fault it all into RAM, otherwise the first iter * of the loop below will be quite slow. We can't use @@ -198,9 +186,7 @@ static int stressone(unsigned long long ramsizeMB) memset(ram, 0xfe, ramsizeMB * 1024 * 1024); if (random_bytes(data, PAGE_SIZE) < 0) { - free(ram); - free(data); - return -1; + return; } before = now(); @@ -227,9 +213,6 @@ static int stressone(unsigned long long ramsizeMB) } } } - - free(data); - free(ram); } @@ -242,7 +225,7 @@ static void *stressthread(void *arg) return NULL; } -static int stress(unsigned long long ramsizeGB, int ncpus) +static void stress(unsigned long long ramsizeGB, int ncpus) { size_t i; unsigned long long ramsizeMB = ramsizeGB * 1024 / ncpus; @@ -255,8 +238,6 @@ static int stress(unsigned long long ramsizeGB, int ncpus) } stressone(ramsizeMB); - - return 0; } @@ -352,8 +333,7 @@ int main(int argc, char **argv) fprintf(stdout, "%s (%05d): INFO: RAM %llu GiB across %d CPUs\n", argv0, gettid(), ramsizeGB, ncpus); - if (stress(ramsizeGB, ncpus) < 0) - exit_failure(); + stress(ramsizeGB, ncpus); - exit_success(); + exit_failure(); } diff --git a/tests/plugin/Makefile b/tests/plugin/Makefile index b3250e2504..3a50451428 100644 --- a/tests/plugin/Makefile +++ b/tests/plugin/Makefile @@ -17,7 +17,7 @@ NAMES += lockstep SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES))) -QEMU_CFLAGS += -fPIC +QEMU_CFLAGS += -fPIC -Wpsabi QEMU_CFLAGS += -I$(SRC_PATH)/include/qemu all: $(SONAMES) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 601c756117..b843f88a66 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -277,6 +277,8 @@ class TestSingleBlockdev(TestSingleDrive): result = self.vm.run_job('job0') self.assertEqual(result, 'Source and target image have different sizes') + # qed does not support shrinking + @iotests.skip_for_formats(('qed')) def test_small_target(self): self.do_test_target_size(self.image_len // 2) diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118 index adc8a848b5..2350929fd8 100755 --- a/tests/qemu-iotests/118 +++ b/tests/qemu-iotests/118 @@ -683,11 +683,10 @@ class TestBlockJobsAfterCycle(ChangeBaseClass): except OSError: pass + # We need backing file support + @iotests.skip_for_formats(('vpc', 'parallels', 'qcow', 'vdi', 'vmdk', 'raw', + 'vhdx')) def test_snapshot_and_commit(self): - # We need backing file support - if iotests.imgfmt != 'qcow2' and iotests.imgfmt != 'qed': - return - result = self.vm.qmp('blockdev-snapshot-sync', device='drive0', snapshot_file=new_img, format=iotests.imgfmt) diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 7abbe82427..e782c5957e 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -24,11 +24,11 @@ Testing: dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -54,11 +54,11 @@ Testing: -fda TEST_DIR/t.qcow2 dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -81,22 +81,22 @@ Testing: -fdb TEST_DIR/t.qcow2 dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -119,22 +119,22 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -160,11 +160,11 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -187,22 +187,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -225,22 +225,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -266,11 +266,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -293,11 +293,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -320,22 +320,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -361,11 +361,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -388,11 +388,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -415,22 +415,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -456,22 +456,22 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -494,22 +494,22 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -532,11 +532,11 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -559,11 +559,11 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -589,22 +589,22 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -627,22 +627,22 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -665,22 +665,22 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -703,22 +703,22 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -750,22 +750,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -788,22 +788,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -832,22 +832,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -870,22 +870,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -908,22 +908,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 0 (0x0) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -946,22 +946,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 0 (0x0) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -999,11 +999,11 @@ Testing: -device floppy dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -1026,11 +1026,11 @@ Testing: -device floppy,drive-type=120 dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "120" @@ -1053,11 +1053,11 @@ Testing: -device floppy,drive-type=144 dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -1080,11 +1080,11 @@ Testing: -device floppy,drive-type=288 dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -1110,11 +1110,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "120" @@ -1137,11 +1137,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -1167,11 +1167,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -1194,17 +1194,17 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096 -QEMU_PROG: -device floppy,drive=none0,logical_block_size=4096: Physical and logical block size must be 512 for floppy +QEMU_PROG: -device floppy,drive=none0,logical_block_size=4096: logical_block_size > physical_block_size not supported Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=1024 QEMU_PROG: -device floppy,drive=none0,physical_block_size=1024: Physical and logical block size must be 512 for floppy diff --git a/tests/qemu-iotests/190 b/tests/qemu-iotests/190 index fe630918e9..c22d8d64f9 100755 --- a/tests/qemu-iotests/190 +++ b/tests/qemu-iotests/190 @@ -41,6 +41,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 # See 178 for more extensive tests across more formats _supported_fmt qcow2 _supported_proto file +# compat=0.10 does not support bitmaps +_unsupported_imgopts 'compat=0.10' echo "== Huge file without bitmaps ==" echo diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229 index 99acb55ebb..89a5359f32 100755 --- a/tests/qemu-iotests/229 +++ b/tests/qemu-iotests/229 @@ -46,6 +46,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 qed _supported_proto file _supported_os Linux +# blkdebug can only inject errors on bs->file, so external data files +# do not work with this test +_unsupported_imgopts data_file DEST_IMG="$TEST_DIR/d.$IMGFMT" diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291 index 404f8521f7..28e4fb9b4d 100755 --- a/tests/qemu-iotests/291 +++ b/tests/qemu-iotests/291 @@ -39,6 +39,8 @@ _supported_fmt qcow2 _supported_proto file _supported_os Linux _require_command QEMU_NBD +# compat=0.10 does not support bitmaps +_unsupported_imgopts 'compat=0.10' echo echo "=== Initial image setup ===" diff --git a/tests/qemu-iotests/292 b/tests/qemu-iotests/292 index a2de27cca4..83ab19231d 100755 --- a/tests/qemu-iotests/292 +++ b/tests/qemu-iotests/292 @@ -40,6 +40,11 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file _supported_os Linux +# We need qemu-img map to show the file where the data is allocated, +# but with an external data file, it will show that instead of the +# file we want to check. So just skip this test for external data +# files. +_unsupported_imgopts data_file echo '### Create the backing image' BACKING_IMG="$TEST_IMG.base" diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index f20d90f969..5ea4c4df8b 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -1103,6 +1103,22 @@ def skip_if_unsupported(required_formats=(), read_only=False): return func_wrapper return skip_test_decorator +def skip_for_formats(formats: Sequence[str] = ()) \ + -> Callable[[Callable[[QMPTestCase, List[Any], Dict[str, Any]], None]], + Callable[[QMPTestCase, List[Any], Dict[str, Any]], None]]: + '''Skip Test Decorator + Skips the test for the given formats''' + def skip_test_decorator(func): + def func_wrapper(test_case: QMPTestCase, *args: List[Any], + **kwargs: Dict[str, Any]) -> None: + if imgfmt in formats: + msg = f'{test_case}: Skipped for format {imgfmt}' + test_case.case_skip(msg) + else: + func(test_case, *args, **kwargs) + return func_wrapper + return skip_test_decorator + def skip_if_user_is_root(func): '''Skip Test Decorator Runs the test only without root permissions''' diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py index 8c187e9a72..0910e6ac07 100755 --- a/tests/qemu-iotests/qcow2.py +++ b/tests/qemu-iotests/qcow2.py @@ -2,6 +2,8 @@ # # Manipulations with qcow2 image # +# Copyright (C) 2012 Red Hat, Inc. +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or diff --git a/tests/qemu-iotests/qcow2_format.py b/tests/qemu-iotests/qcow2_format.py index 0f65fd161d..cc432e7ae0 100644 --- a/tests/qemu-iotests/qcow2_format.py +++ b/tests/qemu-iotests/qcow2_format.py @@ -1,6 +1,7 @@ # Library for manipulations with qcow2 image # # Copyright (c) 2020 Virtuozzo International GmbH. +# Copyright (C) 2012 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c index bd9e7b083c..3b1522acdd 100644 --- a/tools/virtiofsd/seccomp.c +++ b/tools/virtiofsd/seccomp.c @@ -42,6 +42,7 @@ static const int syscall_whitelist[] = { SCMP_SYS(exit_group), SCMP_SYS(fallocate), SCMP_SYS(fchdir), + SCMP_SYS(fchmod), SCMP_SYS(fchmodat), SCMP_SYS(fchownat), SCMP_SYS(fcntl), |