[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 1/1] exec: make -mem-path filenames deterministi
From: |
Anthony Liguori |
Subject: |
Re: [Qemu-devel] [PATCH 1/1] exec: make -mem-path filenames deterministic |
Date: |
Mon, 07 Jan 2013 13:55:54 -0600 |
User-agent: |
Notmuch/0.13.2+93~ged93d79 (http://notmuchmail.org) Emacs/23.3.1 (x86_64-pc-linux-gnu) |
Peter Feiner <address@hidden> writes:
> This patch makes the -mem-path filenames deterministic and allows some control
> over how QEMU mmaps the files. Given this control, QEMU can be used to
> implement
> exogenous memory management techniques quite simply. Two examples follow:
>
> 1. Post-copy migration (mmap=shared for origin, save device state, path in
> NFS available on both hosts, mmap=shared for destination).
> 2. memory sharing via VM cloning (mmap=shared for master, save device
> state,
> then mmap=private for clones).
>
> These new features are exposed via arguments to -mem-path:
>
> -mem-path \
> path[,mode=temp|open|create][,mmap=private|shared][,nofallback][,anyfs]
>
> The backing file names are made deterministic by including their RAMBlocks'
> offsets, which are unique given a qemu command line. Note that Xen's live
> migration relies on RAMBlocks having the same offsets between QEMU processes
> (see xen_read_physmap). The new file name format is
>
> qemu_back_mem.OFFSET+SIZE.NAME[.RANDOM]
>
> where SIZE and NAME are added for extra sanity checking when mode="open".
>
> By default, the old -mem-path behavior is preserved. I.e., mode="temp" is
> used,
> which adds a random suffix to the deterministic name and unlinks the files.
>
> Signed-off-by: Peter Feiner <address@hidden>
This is not reasonable IMHO.
I was okay with sticking a name on a ramblock, but encoding a guest PA
offset turns this into a supported ABI which I'm not willing to do.
A one line change is one thing, but not a complex new option that
introduces an ABI only for a proprietary product that's jumping through hoops
to keep
from contributing useful logic to QEMU.
Regards,
Anthony Liguori
> ---
> cpu-all.h | 5 ++++
> exec.c | 60 ++++++++++++++++++++++++++++++++++--------------------
> qemu-config.c | 26 +++++++++++++++++++++++
> qemu-options.hx | 24 +++++++++++++++++++--
> vl.c | 43 +++++++++++++++++++++++++++++++++++++-
> 5 files changed, 131 insertions(+), 27 deletions(-)
>
> diff --git a/cpu-all.h b/cpu-all.h
> index c9c51b8..a83f71e 100644
> --- a/cpu-all.h
> +++ b/cpu-all.h
> @@ -500,6 +500,11 @@ typedef struct RAMList {
> extern RAMList ram_list;
>
> extern const char *mem_path;
> +extern int mem_temp;
> +extern int mem_create;
> +extern int mem_fallback;
> +extern int mem_shared;
> +extern int mem_hugetlbfs;
> extern int mem_prealloc;
>
> /* Flags stored in the low bits of the TLB virtual address. These are
> diff --git a/exec.c b/exec.c
> index 8435de0..75b146c 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2346,7 +2346,7 @@ void qemu_flush_coalesced_mmio_buffer(void)
>
> #define HUGETLBFS_MAGIC 0x958458f6
>
> -static long gethugepagesize(const char *path)
> +static long getfspagesize(const char *path, int want_hugetlbfs)
> {
> struct statfs fs;
> int ret;
> @@ -2360,7 +2360,7 @@ static long gethugepagesize(const char *path)
> return 0;
> }
>
> - if (fs.f_type != HUGETLBFS_MAGIC)
> + if (want_hugetlbfs && fs.f_type != HUGETLBFS_MAGIC)
> fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
>
> return fs.f_bsize;
> @@ -2373,17 +2373,15 @@ static void *file_ram_alloc(RAMBlock *block,
> char *filename;
> void *area;
> int fd;
> -#ifdef MAP_POPULATE
> int flags;
> -#endif
> - unsigned long hpagesize;
> + unsigned long pagesize;
>
> - hpagesize = gethugepagesize(path);
> - if (!hpagesize) {
> + pagesize = getfspagesize(path, mem_hugetlbfs);
> + if (!pagesize) {
> return NULL;
> }
>
> - if (memory < hpagesize) {
> + if (memory < pagesize) {
> return NULL;
> }
>
> @@ -2392,20 +2390,35 @@ static void *file_ram_alloc(RAMBlock *block,
> return NULL;
> }
>
> - if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
> + if (asprintf(&filename, "%s/qemu_back_mem.%"PRIx64"+%"PRIx64".%s",
> + path, block->offset, memory, block->mr->name) == -1) {
> return NULL;
> }
>
> - fd = mkstemp(filename);
> + if (mem_temp) {
> + if (asprintf(&filename, "%s.XXXXXX", filename) == -1) {
> + return NULL;
> + }
> + fd = mkstemp(filename);
> + } else {
> + flags = O_RDWR | O_CLOEXEC | (mem_create ? O_CREAT : 0);
> + fd = open(filename, flags, 0600);
> + }
> +
> if (fd < 0) {
> - perror("unable to create backing store for hugepages");
> + fprintf(stderr, "unable to create backing store %s "
> + "for guest memory: %s\n", filename, strerror(errno));
> free(filename);
> return NULL;
> }
> - unlink(filename);
> +
> + if (mem_temp) {
> + unlink(filename);
> + }
> +
> free(filename);
>
> - memory = (memory+hpagesize-1) & ~(hpagesize-1);
> + memory = (memory+pagesize-1) & ~(pagesize-1);
>
> /*
> * ftruncate is not supported by hugetlbfs in older
> @@ -2416,16 +2429,16 @@ static void *file_ram_alloc(RAMBlock *block,
> if (ftruncate(fd, memory))
> perror("ftruncate");
>
> + flags = mem_shared ? MAP_SHARED : MAP_PRIVATE;
> #ifdef MAP_POPULATE
> /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
> - * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
> + * MAP_PRIVATE is requested. For mem_prealloc we always mmap as
> MAP_SHARED
> * to sidestep this quirk.
> */
> - flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
> - area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> -#else
> - area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> + if (mem_prealloc)
> + flags = MAP_POPULATE | MAP_SHARED;
> #endif
> + area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> if (area == MAP_FAILED) {
> perror("file_ram_alloc: can't mmap RAM pages");
> close(fd);
> @@ -2561,6 +2574,10 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size,
> void *host,
> #if defined (__linux__) && !defined(TARGET_S390X)
> new_block->host = file_ram_alloc(new_block, size, mem_path);
> if (!new_block->host) {
> + if (!mem_fallback) {
> + fprintf(stderr, "failed to allocate ram file for %s\n",
> mr->name);
> + exit(1);
> + }
> new_block->host = qemu_vmalloc(size);
> memory_try_enable_merging(new_block->host, size);
> }
> @@ -2675,11 +2692,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t
> length)
> if (mem_path) {
> #if defined(__linux__) && !defined(TARGET_S390X)
> if (block->fd) {
> + flags = mem_shared ? MAP_SHARED : MAP_PRIVATE;
> #ifdef MAP_POPULATE
> - flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
> - MAP_PRIVATE;
> -#else
> - flags |= MAP_PRIVATE;
> + if (mem_prealloc)
> + flags = MAP_POPULATE | MAP_SHARED;
> #endif
> area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
> flags, block->fd, offset);
> diff --git a/qemu-config.c b/qemu-config.c
> index 10d1ba4..45b24d2 100644
> --- a/qemu-config.c
> +++ b/qemu-config.c
> @@ -691,6 +691,31 @@ static QemuOptsList qemu_object_opts = {
> },
> };
>
> +static QemuOptsList qemu_mempath_opts = {
> + .name = "mempath",
> + .implied_opt_name = "path",
> + .head = QTAILQ_HEAD_INITIALIZER(qemu_mempath_opts.head),
> + .desc = {
> + {
> + .name = "path",
> + .type = QEMU_OPT_STRING,
> + },{
> + .name = "mmap",
> + .type = QEMU_OPT_STRING,
> + },{
> + .name = "mode",
> + .type = QEMU_OPT_STRING,
> + },{
> + .name = "anyfs",
> + .type = QEMU_OPT_BOOL,
> + },{
> + .name = "fallback",
> + .type = QEMU_OPT_BOOL,
> + },
> + { /* end of list */ }
> + },
> +};
> +
> static QemuOptsList *vm_config_groups[32] = {
> &qemu_drive_opts,
> &qemu_chardev_opts,
> @@ -709,6 +734,7 @@ static QemuOptsList *vm_config_groups[32] = {
> &qemu_sandbox_opts,
> &qemu_add_fd_opts,
> &qemu_object_opts,
> + &qemu_mempath_opts,
> NULL,
> };
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 9bb29d3..8972397 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -443,10 +443,28 @@ gigabytes respectively.
> ETEXI
>
> DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> - "-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
> + "-mem-path
> path[,mmap=private|shared][,mode=temp|open|create][,nofallback][,anyfs]\n"
> + " provide backing storage for guest RAM\n",
> + QEMU_ARCH_ALL)
> STEXI
> address@hidden -mem-path @var{path}
> -Allocate guest RAM from a temporarily created file in @var{path}.
> address@hidden -mem-path
> @var{path}[,mmap=private|shared][,mode=temp|open|create][,nofallback][,anyfs]
> +Allocate guest RAM from files in @var{path}.
> address@hidden mmap=private|shared
> +Specifies how backing files are mmap'd. "private" indicates a COW mapping,
> thus
> +leaving the underlying file unchanged. "shared" indicates a write-through
> +mapping, thus reflecting the guest's memory in the underlying file. Default
> is
> +private.
> address@hidden mode=temp|open|create
> +Determines how backing files are created and opened. "temp" indicates that
> +unique temporary files are created in @var{path} and unlinked after opening.
> +Both "create" and "open" indicate that deterministic names are used and the
> +files aren't unlinked. The The difference between "create" and "open" is that
> +"open" expects the files to already exist. Default is temp.
> address@hidden nofallback
> +Fail if backing files cannot be used for guest RAM (e.g., permission error,
> bad
> +path). By default, RAM allocation falls back to normal method.
> address@hidden anyfs
> +Suppressing warnings if @var{path} isn't on a hugetlbfs filesystem.
> ETEXI
>
> #ifdef MAP_POPULATE
> diff --git a/vl.c b/vl.c
> index c8e9c78..3b7cea7 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -185,6 +185,11 @@ static int display_remote;
> const char* keyboard_layout = NULL;
> ram_addr_t ram_size;
> const char *mem_path = NULL;
> +int mem_shared = 0;
> +int mem_hugetlbfs = 1;
> +int mem_temp = 1;
> +int mem_create = 1;
> +int mem_fallback = 1;
> #ifdef MAP_POPULATE
> int mem_prealloc = 0; /* force preallocation of physical target memory */
> #endif
> @@ -2938,9 +2943,43 @@ int main(int argc, char **argv, char **envp)
> }
> break;
> }
> - case QEMU_OPTION_mempath:
> - mem_path = optarg;
> + case QEMU_OPTION_mempath: {
> + const char *value;
> + opts = qemu_opts_parse(qemu_find_opts("mempath"), optarg, 1);
> + if (!opts) {
> + exit(1);
> + }
> +
> + mem_path = qemu_opt_get(opts, "path");
> + mem_hugetlbfs = !qemu_opt_get_bool(opts, "anyfs", 0);
> + mem_fallback = qemu_opt_get_bool(opts, "fallback", 1);
> +
> + value = qemu_opt_get(opts, "mode");
> + if (value == NULL || !strcmp(value, "temp")) {
> + mem_temp = 1;
> + } else {
> + mem_temp = 0;
> + if (!strcmp(value, "create")) {
> + mem_create = 1;
> + } else if (!strcmp(value, "open")) {
> + mem_create = 0;
> + } else {
> + fprintf(stderr, "Invalid -mem-path mode value.\n");
> + exit(1);
> + }
> + }
> +
> + value = qemu_opt_get(opts, "mmap");
> + if (value == NULL || !strcmp(value, "private")) {
> + mem_shared = 0;
> + } else if (!strcmp(value, "shared")) {
> + mem_shared = 1;
> + } else {
> + fprintf(stderr, "Invalid -mem-path mmap value.\n");
> + exit(1);
> + }
> break;
> + }
> #ifdef MAP_POPULATE
> case QEMU_OPTION_mem_prealloc:
> mem_prealloc = 1;
> --
> 1.7.5.4
- Re: [Qemu-devel] [PATCH 1/1] exec: make -mem-path filenames deterministic,
Anthony Liguori <=