VFIO migration is not compatible with postcopy migration. A VFIO device
in the destination can't handle page faults for pages that have not been
sent yet.
Doing such migration will cause the VM to crash in the destination:
qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address
qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000,
0x7f1b11a00000) = -14 (Bad address)
qemu: hardware error: vfio: DMA mapping failed, unable to continue
To prevent this and to be explicit about supported features, block VFIO
migration with postcopy migration: Fail setting postcopy capability if a
VFIO device is present, and add a migration blocker if a VFIO device is
added when postcopy capability is on.
Reported-by: Yanghang Liu <yanghliu@redhat.com>
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
include/hw/vfio/vfio-common.h | 2 ++
migration/migration.h | 2 ++
hw/vfio/common.c | 43 +++++++++++++++++++++++++++++++++++
hw/vfio/migration.c | 6 +++++
migration/options.c | 19 ++++++++++++++++
migration/target.c | 19 ++++++++++++++++
6 files changed, 91 insertions(+)
diff --git a/include/hw/vfio/vfio-common.h
b/include/hw/vfio/vfio-common.h
index e9b8954595..c0b58f2bb7 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -227,6 +227,8 @@ extern VFIOGroupList vfio_group_list;
bool vfio_mig_active(void);
int vfio_block_multiple_devices_migration(VFIODevice *vbasedev,
Error **errp);
void vfio_unblock_multiple_devices_migration(void);
+int vfio_block_postcopy_migration(VFIODevice *vbasedev, Error **errp);
+void vfio_unblock_postcopy_migration(void);
bool vfio_viommu_preset(VFIODevice *vbasedev);
int64_t vfio_mig_bytes_transferred(void);
void vfio_reset_bytes_transferred(void);
diff --git a/migration/migration.h b/migration/migration.h
index c5695de214..21a6423408 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -514,6 +514,8 @@ void migration_cancel(const Error *error);
void migration_populate_vfio_info(MigrationInfo *info);
void migration_reset_vfio_bytes_transferred(void);
+bool migration_vfio_mig_active(void);
+void migration_vfio_unblock_postcopy_migration(void);
void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
#endif
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 373f6e5932..7461194b2b 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -40,6 +40,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "migration/migration.h"
+#include "migration/options.h"
#include "migration/misc.h"
#include "migration/blocker.h"
#include "migration/qemu-file.h"
@@ -343,6 +344,7 @@ static int vfio_get_dirty_bitmap(VFIOContainer
*container, uint64_t iova,
uint64_t size, ram_addr_t ram_addr);
static Error *multiple_devices_migration_blocker;
+static Error *postcopy_migration_blocker;
static unsigned int vfio_migratable_devices_num(void)
{
@@ -427,6 +429,47 @@ void vfio_unblock_multiple_devices_migration(void)
multiple_devices_migration_blocker = NULL;
}
+int vfio_block_postcopy_migration(VFIODevice *vbasedev, Error **errp)
+{
+ int ret;
+
+ if (!migrate_postcopy_ram()) {
+ return 0;
+ }
+
+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
+ error_setg(errp,
+ "VFIO migration is not compatible with postcopy
migration");
+ return -EINVAL;
+ }
+
+ if (postcopy_migration_blocker) {
+ return 0;
+ }
+
+ error_setg(&postcopy_migration_blocker,
+ "VFIO migration is not compatible with postcopy
migration");
+ ret = migrate_add_blocker(postcopy_migration_blocker, errp);
+ if (ret < 0) {
+ error_free(postcopy_migration_blocker);
+ postcopy_migration_blocker = NULL;
+ }
+
+ return ret;
+}
+
+void vfio_unblock_postcopy_migration(void)
+{
+ if (!postcopy_migration_blocker ||
+ (vfio_migratable_devices_num() && migrate_postcopy_ram())) {
+ return;
+ }
+
+ migrate_del_blocker(postcopy_migration_blocker);
+ error_free(postcopy_migration_blocker);
+ postcopy_migration_blocker = NULL;
+}
+
bool vfio_mig_active(void)
{
return vfio_migratable_devices_num();
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 71855468fe..76406e9ae9 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -856,6 +856,7 @@ static void vfio_migration_deinit(VFIODevice
*vbasedev)
unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
vfio_migration_free(vbasedev);
vfio_unblock_multiple_devices_migration();
+ vfio_unblock_postcopy_migration();
}
static int vfio_block_migration(VFIODevice *vbasedev, Error *err,
Error **errp)
@@ -939,6 +940,11 @@ bool vfio_migration_realize(VFIODevice
*vbasedev, Error **errp)
goto out_deinit;
}
+ ret = vfio_block_postcopy_migration(vbasedev, errp);
+ if (ret) {
+ goto out_deinit;
+ }
+
if (vfio_viommu_preset(vbasedev)) {
error_setg(&err, "%s: Migration is currently not supported "
"with vIOMMU enabled", vbasedev->name);
diff --git a/migration/options.c b/migration/options.c
index 1d1e1321b0..e201053563 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -499,6 +499,11 @@ bool migrate_caps_check(bool *old_caps, bool
*new_caps, Error **errp)
error_setg(errp, "Postcopy is not yet compatible with
multifd");
return false;
}
+
+ if (migration_vfio_mig_active()) {
+ error_setg(errp, "Postcopy is not compatible with VFIO
migration");
+ return false;
+ }
}
if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
@@ -612,6 +617,16 @@ bool migrate_caps_check(bool *old_caps, bool
*new_caps, Error **errp)
return true;
}
+/*
+ * Devices might have added migration blockers based on migration
capabilities
+ * values when those devices were added. Remove such blockers
according to new
+ * changes in migration capabilities.
+ */
+static void migration_caps_remove_blockers(void)
+{
+ migration_vfio_unblock_postcopy_migration();
+}
+
bool migrate_cap_set(int cap, bool value, Error **errp)
{
MigrationState *s = migrate_get_current();
@@ -629,6 +644,8 @@ bool migrate_cap_set(int cap, bool value, Error
**errp)
return false;
}
s->capabilities[cap] = value;
+ migration_caps_remove_blockers();
+
return true;
}
@@ -678,6 +695,8 @@ void
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
for (cap = params; cap; cap = cap->next) {
s->capabilities[cap->value->capability] = cap->value->state;
}
+
+ migration_caps_remove_blockers();
}
/* parameters */
diff --git a/migration/target.c b/migration/target.c
index a6ffa9a5ce..690ecb4dd5 100644
--- a/migration/target.c
+++ b/migration/target.c
@@ -27,6 +27,16 @@ void migration_reset_vfio_bytes_transferred(void)
{
vfio_reset_bytes_transferred();
}
+
+bool migration_vfio_mig_active(void)
+{
+ return vfio_mig_active();
+}
+
+void migration_vfio_unblock_postcopy_migration(void)
+{
+ vfio_unblock_postcopy_migration();
+}
#else
void migration_populate_vfio_info(MigrationInfo *info)
{
@@ -35,4 +45,13 @@ void migration_populate_vfio_info(MigrationInfo
*info)
void migration_reset_vfio_bytes_transferred(void)
{
}
+
+bool migration_vfio_mig_active(void)
+{
+ return false;
+}
+
+void migration_vfio_unblock_postcopy_migration()