Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
- migration.migration_with_disk.migration_with_vhostuser:
type = migration_with_vhostuser
start_vm = "no"
migration_setup = "yes"
storage_type = "nfs"
setup_local_nfs = "yes"
disk_type = "file"
disk_source_protocol = "netfs"
mnt_path_name = ${nfs_mount_dir}
# Console output can only be monitored via virsh console output
only_pty = True
take_regular_screendumps = no
# Extra options to pass after <domain> <desturi>
virsh_migrate_extra = ""
# SSH connection time out
ssh_timeout = 60
virsh_migrate_connect_uri = "qemu:///system"
virsh_migrate_dest_state = "running"
virsh_migrate_src_state = "shut off"
server_ip = "${migrate_dest_host}"
server_user = "root"
server_pwd = "${migrate_dest_pwd}"
client_ip = "${migrate_dest_host}"
client_pwd = "${migrate_source_pwd}"
migrate_desturi_port = "22"
migrate_desturi_type = "ssh"
virsh_migrate_desturi = "qemu+ssh://${migrate_dest_host}/system"
func_supported_since_libvirt_ver = (7, 0, 0)
vm_attrs = {"mb": {"source_type":"memfd", "access_mode": "shared"}}
source_file = "/tmp/vhost.sock"
queues = 1
disk_dict = {"type_name": "vhostuser", "device": "disk", "driver": {"name": "qemu", "type": "raw", "queues": ${queues}}, "source": {"attrs": {"type": "unix", "path": "${source_file}"}}, "target": {"dev": "vdb", "bus": "virtio"}}
no ppc64le
variants:
- with_precopy:
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import os
import ast

from avocado.utils import process

from virttest import libvirt_version
from virttest import remote
from virttest import virsh
from virttest import utils_disk
from virttest import data_dir

from virttest.libvirt_xml import vm_xml
from virttest.utils_libvirt import libvirt_vmxml
from virttest.utils_test import libvirt

from provider.migration import base_steps


def start_vhost_sock_service_in_source(start_sock_service_cmd, image_path, sock_path):
"""
Start one vhost sock service in source host.

:param start_sock_service_cmd: command to start vhost service
:param image_path: image file path
:param sock_path: sock file path
"""
# Create backend image in source host
libvirt.create_local_disk("file", image_path, size="100M")
chown_cmd = "chown qemu:qemu %s" % image_path
process.run(chown_cmd, ignore_status=False, shell=True)
# Start vhost sock service in source host
process.run(start_sock_service_cmd, ignore_status=False, shell=True).stdout_text.strip()
# Set SELinux context in source host
ch_seccontext_cmd = "chcon -t svirt_image_t %s" % sock_path
process.run(ch_seccontext_cmd, ignore_status=False, shell=True)
set_bool_mmap_cmd = "setsebool domain_can_mmap_files 1"
process.run(set_bool_mmap_cmd, ignore_status=False, shell=True)


def start_vhost_sock_service_in_remote(start_sock_service_cmd, image_path, sock_path, params):
"""
Prepare and start one vhost sock service in remote host.

:param start_sock_service_cmd: command to start vhost service
:param image_path: image file path
:param sock_path: sock file path
:param params: test parameters
"""
remote.run_remote_cmd(f"mkdir -p {os.path.dirname(image_path)}", params, ignore_status=True)
# Create backend image in remote host
remote_create_cmd = f"dd if=/dev/zero of={image_path} bs=1M count=100 && chown qemu:qemu {image_path}"
remote.run_remote_cmd(remote_create_cmd, params, ignore_status=False)
# Start vhost sock service in remote host
remote.run_remote_cmd(start_sock_service_cmd, params, ignore_status=False)
# Set SELinux context in remote host
remote_selinux_cmd = f"chcon -t svirt_image_t {sock_path} && setsebool domain_can_mmap_files 1"
remote.run_remote_cmd(remote_selinux_cmd, params, ignore_status=False)


def run(test, params, env):
"""
Test vhostuser disk migration.

1.Prepare vhostuser disk and start the domain.
2.Perform migration operation.
3.Verify vhostuser disk after migration.
"""

def setup_test():
"""
Setup steps before migration
"""
nonlocal image_path, sock_path

test.log.info("Setup steps for vhostuser disk migration.")

sock_path = params.get("source_file", "/tmp/vhost.sock")
image_path = data_dir.get_data_dir() + '/test.img'
disk_dict = ast.literal_eval(params.get("disk_dict", "{}"))
vm_attrs = ast.literal_eval(params.get("vm_attrs", "{}"))

# Define start_sock_service_cmd
start_sock_service_cmd = (
'systemd-run --uid qemu --gid qemu /usr/bin/qemu-storage-daemon'
' --blockdev \'{"driver":"file","filename":"%s","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}\''
' --blockdev \'{"node-name":"libvirt-1-format","read-only":false,"driver":"raw","file":"libvirt-1-storage"}\''
' --export vhost-user-blk,id=vhost-user-blk0,node-name=libvirt-1-format,addr.type=unix,addr.path=%s,writable=on'
' --chardev stdio,mux=on,id=char0; sleep 3'
% (image_path, sock_path))
Comment on lines +83 to +89
Copy link

@coderabbitai coderabbitai bot Nov 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Tighten qemu‑storage‑daemon lifecycle management (systemd‑run output + killall)

Two related points here:

  1. Service ID from systemd-run
    start_vhost_sock_service_in_source / _in_remote capture the raw stdout of systemd-run as vsock_service_id / remote_vsock_service_id, then later call systemctl stop {id}. systemd-run typically prints a sentence like Running as unit: foo.service, not just the unit name, so passing that whole string to systemctl stop is fragile and likely a no‑op. You either need to:

    • Specify a known --unit=<name> in the systemd-run command and use that name directly for systemctl stop, or
    • Parse the unit name out of stdout before storing it.
  2. Global killall qemu-storage-daemon on both hosts
    The pidof qemu-storage-daemon && killall qemu-storage-daemon cleanup on source and remote will terminate all qemu-storage-daemon instances, not just the one this test started. That can interfere with other tests or any legitimate services using qemu‑storage‑daemon on the same hosts.

It would be safer to manage just the service(s) you start (via explicit --unit names or PIDs) and avoid broad killall unless you know the environment is completely dedicated.

Also applies to: 95-98, 166-170, 173-178

🤖 Prompt for AI Agents
libvirt/tests/src/migration/migration_with_disk/migration_with_vhostuser.py
around lines 86-92 (and similarly 95-98, 166-170, 173-178): the test starts
qemu-storage-daemon with systemd-run and later stops it by passing the raw
systemd-run stdout to systemctl and by running global killall; change the start
command to include an explicit --unit=<unique_name> (or capture and parse the
unit name from systemd-run stdout) and store that unit name so subsequent
systemctl stop uses only that unit, and stop using killall qemu-storage-daemon —
instead record the daemon PID(s) you start (or rely on the named systemd unit)
and only kill/stop those specific PIDs/units during cleanup to avoid terminating
other processes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've adjusted this part, please check.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check the current state of the vhostuser migration test file to see the adjustments
cat libvirt/tests/src/migration/migration_with_disk/migration_with_vhostuser.py

Length of output: 7424


@meinaLi, I appreciate the effort, but the core issue isn't fully resolved yet. While you removed the broken systemctl stop logic (which is good), the cleanup still uses pkill -f qemu-storage-daemon, which has the same problem as the original killall qemu-storage-daemon.

The problem: Both killall and pkill -f qemu-storage-daemon will terminate all qemu-storage-daemon processes on the host—not just the ones your test started. This can break other concurrent tests or legitimate services.

Recommended solution (choose one):

Option 1: Use explicit systemd unit names (cleanest approach)

# In the command construction (line ~86):
unit_name = f"vhost-test-{vm_name}-{int(time.time())}"  # unique unit name
start_sock_service_cmd = (
    f'systemd-run --unit={unit_name} --uid qemu --gid qemu /usr/bin/qemu-storage-daemon'
    ' --blockdev \'{"driver":"file","filename":"%s","node-name":"libvirt-1-storage",...}\''
    ...
)

# Store the unit_name in setup_test (return from start_vhost_* functions)
# Then in cleanup_test:
process.run(f"systemctl stop {unit_name}", ignore_status=True, shell=True)
remote.run_remote_cmd(f"systemctl stop {remote_unit_name}", params, ignore_status=True)

Option 2: Track PIDs
Capture the PID from the systemd-run output or the qemu-storage-daemon process itself, then kill only that specific PID during cleanup.

Would you like me to provide a more detailed code example for either approach?


🧠 Learnings used
Learnt from: hholoubk
Repo: autotest/tp-libvirt PR: 6579
File: libvirt/tests/src/sriov/vIOMMU/iommu_device_lifecycle.py:95-97
Timestamp: 2025-09-24T08:01:27.899Z
Learning: In the libvirt test framework used in tp-libvirt, VM cleanup including destroying running VMs is handled by the teardown method (test_obj.teardown_iommu_test()) called in the finally block, so explicit VM destroy calls on timeout are not necessary according to the maintainers.


# Start vhost service in source host
start_vhost_sock_service_in_source(start_sock_service_cmd, image_path, sock_path)
# Start vhost service in remote host
start_vhost_sock_service_in_remote(start_sock_service_cmd, image_path, sock_path, params)
# Setup migration connection
migration_obj.setup_connection()
# Prepare the VM with memory backing and vhostuser disk.
vmxml = vm_xml.VMXML.new_from_dumpxml(vm_name)
vmxml.setup_attrs(**vm_attrs)
disk_obj = libvirt_vmxml.create_vm_device_by_type("disk", disk_dict)
test.log.debug("vhostuser disk xml is:\n%s" % disk_obj)
vmxml.add_device(disk_obj)
vmxml.sync()
base_steps.sync_cpu_for_mig(params)
vm.start()
vm.wait_for_login().close()

# Check if vhostuser disk is accessible in VM
if "vhostuser" not in virsh.dumpxml(vm_name).stdout_text:
test.fail("Check vhostuser disk in VM failed")

test.log.info("Setup completed successfully.")

def verify_test():
"""
Verify steps after migration

"""
test.log.info("Verify steps after vhostuser disk migration.")

device_target = params.get("target_dev", "vdb")
desturi = params.get("virsh_migrate_desturi")

# Switch to destination host
backup_uri, vm.connect_uri = vm.connect_uri, desturi
vm.cleanup_serial_console()
vm.create_serial_console()
vm_session = vm.wait_for_serial_login(timeout=120)

try:
# Verify vhostuser disk is still accessible after migration
output = vm_session.cmd_output("lsblk")
test.log.debug("lsblk output after migration: %s", output)
if device_target not in output:
test.fail(f'Vhostuser disk device {device_target} not found in VM after migration')
# Write data to the disk to ensure it's working
utils_disk.dd_data_to_vm_disk(vm_session, "/dev/%s" % device_target)
test.log.info(f"Vhostuser disk {device_target} is accessible after migration")

finally:
vm_session.close()

# Restore original connection URI
vm.connect_uri = backup_uri

# Run default migration verification
migration_obj.verify_default()

test.log.info("Verification completed successfully.")

def cleanup_test():
"""
Cleanup steps for cases

"""
test.log.info("Cleanup steps for vhostuser disk migration.")
if vm.is_alive():
vm.destroy(gracefully=False)
vmxml_backup.sync()

migration_obj.cleanup_connection()

# Cleanup on remote host
remote.run_remote_cmd("pkill -f qemu-storage-daemon", params, ignore_status=True)
remote.run_remote_cmd(f"rm -rf {sock_path} {image_path}", params, ignore_status=True)

# Kill all qemu-storage-daemon process on local host
process.run("pkill -f qemu-storage-daemon", ignore_status=True, shell=True)

# Clean up images
for file_path in [image_path, sock_path]:
if os.path.exists(file_path):
os.remove(file_path)

test.log.info("Cleanup completed successfully.")

libvirt_version.is_libvirt_feature_supported(params)
vm_name = params.get("migrate_main_vm")
vm = env.get_vm(vm_name)

# Initialize variables
image_path = None
sock_path = None

# Back up xml file.
vmxml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name)

# Migration object
migration_obj = base_steps.MigrationBase(test, vm, params)

try:
setup_test()
migration_obj.run_migration()
verify_test()

finally:
cleanup_test()