qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/1] exec: make -mem-path filenames deterministi


From: Anthony Liguori
Subject: Re: [Qemu-devel] [PATCH 1/1] exec: make -mem-path filenames deterministic
Date: Mon, 07 Jan 2013 13:55:54 -0600
User-agent: Notmuch/0.13.2+93~ged93d79 (http://notmuchmail.org) Emacs/23.3.1 (x86_64-pc-linux-gnu)

Peter Feiner <address@hidden> writes:

> This patch makes the -mem-path filenames deterministic and allows some control
> over how QEMU mmaps the files. Given this control, QEMU can be used to 
> implement
> exogenous memory management techniques quite simply. Two examples follow:
>
>     1. Post-copy migration (mmap=shared for origin, save device state, path in
>        NFS available on both hosts, mmap=shared for destination).
>     2. memory sharing via VM cloning (mmap=shared for master, save device 
> state,
>        then mmap=private for clones).
>
> These new features are exposed via arguments to -mem-path:
>
> -mem-path \
>     path[,mode=temp|open|create][,mmap=private|shared][,nofallback][,anyfs]
>
> The backing file names are made deterministic by including their RAMBlocks'
> offsets, which are unique given a qemu command line. Note that Xen's live
> migration relies on RAMBlocks having the same offsets between QEMU processes
> (see xen_read_physmap). The new file name format is
>
>     qemu_back_mem.OFFSET+SIZE.NAME[.RANDOM]
>
> where SIZE and NAME are added for extra sanity checking when mode="open".
>
> By default, the old -mem-path behavior is preserved. I.e., mode="temp" is 
> used,
> which adds a random suffix to the deterministic name and unlinks the files.
>
> Signed-off-by: Peter Feiner <address@hidden>


This is not reasonable IMHO.

I was okay with sticking a name on a ramblock, but encoding a guest PA
offset turns this into a supported ABI which I'm not willing to do.

A one line change is one thing, but not a complex new option that
introduces an ABI only for a proprietary product that's jumping through hoops 
to keep
from contributing useful logic to QEMU.

Regards,

Anthony Liguori

> ---
>  cpu-all.h       |    5 ++++
>  exec.c          |   60 ++++++++++++++++++++++++++++++++++--------------------
>  qemu-config.c   |   26 +++++++++++++++++++++++
>  qemu-options.hx |   24 +++++++++++++++++++--
>  vl.c            |   43 +++++++++++++++++++++++++++++++++++++-
>  5 files changed, 131 insertions(+), 27 deletions(-)
>
> diff --git a/cpu-all.h b/cpu-all.h
> index c9c51b8..a83f71e 100644
> --- a/cpu-all.h
> +++ b/cpu-all.h
> @@ -500,6 +500,11 @@ typedef struct RAMList {
>  extern RAMList ram_list;
>  
>  extern const char *mem_path;
> +extern int mem_temp;
> +extern int mem_create;
> +extern int mem_fallback;
> +extern int mem_shared;
> +extern int mem_hugetlbfs;
>  extern int mem_prealloc;
>  
>  /* Flags stored in the low bits of the TLB virtual address.  These are
> diff --git a/exec.c b/exec.c
> index 8435de0..75b146c 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2346,7 +2346,7 @@ void qemu_flush_coalesced_mmio_buffer(void)
>  
>  #define HUGETLBFS_MAGIC       0x958458f6
>  
> -static long gethugepagesize(const char *path)
> +static long getfspagesize(const char *path, int want_hugetlbfs)
>  {
>      struct statfs fs;
>      int ret;
> @@ -2360,7 +2360,7 @@ static long gethugepagesize(const char *path)
>          return 0;
>      }
>  
> -    if (fs.f_type != HUGETLBFS_MAGIC)
> +    if (want_hugetlbfs && fs.f_type != HUGETLBFS_MAGIC)
>          fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
>  
>      return fs.f_bsize;
> @@ -2373,17 +2373,15 @@ static void *file_ram_alloc(RAMBlock *block,
>      char *filename;
>      void *area;
>      int fd;
> -#ifdef MAP_POPULATE
>      int flags;
> -#endif
> -    unsigned long hpagesize;
> +    unsigned long pagesize;
>  
> -    hpagesize = gethugepagesize(path);
> -    if (!hpagesize) {
> +    pagesize = getfspagesize(path, mem_hugetlbfs);
> +    if (!pagesize) {
>          return NULL;
>      }
>  
> -    if (memory < hpagesize) {
> +    if (memory < pagesize) {
>          return NULL;
>      }
>  
> @@ -2392,20 +2390,35 @@ static void *file_ram_alloc(RAMBlock *block,
>          return NULL;
>      }
>  
> -    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
> +    if (asprintf(&filename, "%s/qemu_back_mem.%"PRIx64"+%"PRIx64".%s",
> +                 path, block->offset, memory, block->mr->name) == -1) {
>          return NULL;
>      }
>  
> -    fd = mkstemp(filename);
> +    if (mem_temp) {
> +        if (asprintf(&filename, "%s.XXXXXX", filename) == -1) {
> +            return NULL;
> +        }
> +        fd = mkstemp(filename);
> +    } else {
> +        flags = O_RDWR | O_CLOEXEC | (mem_create ? O_CREAT : 0);
> +        fd = open(filename, flags, 0600);
> +    }
> +
>      if (fd < 0) {
> -        perror("unable to create backing store for hugepages");
> +        fprintf(stderr, "unable to create backing store %s "
> +                "for guest memory: %s\n", filename, strerror(errno));
>          free(filename);
>          return NULL;
>      }
> -    unlink(filename);
> +
> +    if (mem_temp) {
> +        unlink(filename);
> +    }
> +
>      free(filename);
>  
> -    memory = (memory+hpagesize-1) & ~(hpagesize-1);
> +    memory = (memory+pagesize-1) & ~(pagesize-1);
>  
>      /*
>       * ftruncate is not supported by hugetlbfs in older
> @@ -2416,16 +2429,16 @@ static void *file_ram_alloc(RAMBlock *block,
>      if (ftruncate(fd, memory))
>          perror("ftruncate");
>  
> +    flags = mem_shared ? MAP_SHARED : MAP_PRIVATE;
>  #ifdef MAP_POPULATE
>      /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
> -     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
> +     * MAP_PRIVATE is requested.  For mem_prealloc we always mmap as 
> MAP_SHARED
>       * to sidestep this quirk.
>       */
> -    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
> -    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> -#else
> -    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> +    if (mem_prealloc)
> +        flags = MAP_POPULATE | MAP_SHARED;
>  #endif
> +    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
>      if (area == MAP_FAILED) {
>          perror("file_ram_alloc: can't mmap RAM pages");
>          close(fd);
> @@ -2561,6 +2574,10 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, 
> void *host,
>  #if defined (__linux__) && !defined(TARGET_S390X)
>              new_block->host = file_ram_alloc(new_block, size, mem_path);
>              if (!new_block->host) {
> +                if (!mem_fallback) {
> +                    fprintf(stderr, "failed to allocate ram file for %s\n", 
> mr->name);
> +                    exit(1);
> +                }
>                  new_block->host = qemu_vmalloc(size);
>                  memory_try_enable_merging(new_block->host, size);
>              }
> @@ -2675,11 +2692,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t 
> length)
>                  if (mem_path) {
>  #if defined(__linux__) && !defined(TARGET_S390X)
>                      if (block->fd) {
> +                        flags = mem_shared ? MAP_SHARED : MAP_PRIVATE;
>  #ifdef MAP_POPULATE
> -                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
> -                            MAP_PRIVATE;
> -#else
> -                        flags |= MAP_PRIVATE;
> +                        if (mem_prealloc)
> +                            flags = MAP_POPULATE | MAP_SHARED;
>  #endif
>                          area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
>                                      flags, block->fd, offset);
> diff --git a/qemu-config.c b/qemu-config.c
> index 10d1ba4..45b24d2 100644
> --- a/qemu-config.c
> +++ b/qemu-config.c
> @@ -691,6 +691,31 @@ static QemuOptsList qemu_object_opts = {
>      },
>  };
>  
> +static QemuOptsList qemu_mempath_opts = {
> +    .name = "mempath",
> +    .implied_opt_name = "path",
> +    .head = QTAILQ_HEAD_INITIALIZER(qemu_mempath_opts.head),
> +    .desc = {
> +        {
> +            .name = "path",
> +            .type = QEMU_OPT_STRING,
> +        },{
> +            .name = "mmap",
> +            .type = QEMU_OPT_STRING,
> +        },{
> +            .name = "mode",
> +            .type = QEMU_OPT_STRING,
> +        },{
> +            .name = "anyfs",
> +            .type = QEMU_OPT_BOOL,
> +        },{
> +            .name = "fallback",
> +            .type = QEMU_OPT_BOOL,
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
>  static QemuOptsList *vm_config_groups[32] = {
>      &qemu_drive_opts,
>      &qemu_chardev_opts,
> @@ -709,6 +734,7 @@ static QemuOptsList *vm_config_groups[32] = {
>      &qemu_sandbox_opts,
>      &qemu_add_fd_opts,
>      &qemu_object_opts,
> +    &qemu_mempath_opts,
>      NULL,
>  };
>  
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 9bb29d3..8972397 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -443,10 +443,28 @@ gigabytes respectively.
>  ETEXI
>  
>  DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> -    "-mem-path FILE  provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
> +    "-mem-path 
> path[,mmap=private|shared][,mode=temp|open|create][,nofallback][,anyfs]\n"
> +    "          provide backing storage for guest RAM\n",
> +    QEMU_ARCH_ALL)
>  STEXI
> address@hidden -mem-path @var{path}
> -Allocate guest RAM from a temporarily created file in @var{path}.
> address@hidden -mem-path 
> @var{path}[,mmap=private|shared][,mode=temp|open|create][,nofallback][,anyfs]
> +Allocate guest RAM from files in @var{path}.
> address@hidden mmap=private|shared
> +Specifies how backing files are mmap'd. "private" indicates a COW mapping, 
> thus
> +leaving the underlying file unchanged. "shared" indicates a write-through
> +mapping, thus reflecting the guest's memory in the underlying file. Default 
> is
> +private.
> address@hidden mode=temp|open|create
> +Determines how backing files are created and opened. "temp" indicates that
> +unique temporary files are created in @var{path} and unlinked after opening.
> +Both "create" and "open" indicate that deterministic names are used and the
> +files aren't unlinked. The The difference between "create" and "open" is that
> +"open" expects the files to already exist. Default is temp.
> address@hidden nofallback
> +Fail if backing files cannot be used for guest RAM (e.g., permission error, 
> bad
> +path). By default, RAM allocation falls back to normal method.
> address@hidden anyfs
> +Suppressing warnings if @var{path} isn't on a hugetlbfs filesystem.
>  ETEXI
>  
>  #ifdef MAP_POPULATE
> diff --git a/vl.c b/vl.c
> index c8e9c78..3b7cea7 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -185,6 +185,11 @@ static int display_remote;
>  const char* keyboard_layout = NULL;
>  ram_addr_t ram_size;
>  const char *mem_path = NULL;
> +int mem_shared = 0;
> +int mem_hugetlbfs = 1;
> +int mem_temp = 1;
> +int mem_create = 1;
> +int mem_fallback = 1;
>  #ifdef MAP_POPULATE
>  int mem_prealloc = 0; /* force preallocation of physical target memory */
>  #endif
> @@ -2938,9 +2943,43 @@ int main(int argc, char **argv, char **envp)
>                  }
>                  break;
>              }
> -            case QEMU_OPTION_mempath:
> -                mem_path = optarg;
> +            case QEMU_OPTION_mempath: {
> +                const char *value;
> +                opts = qemu_opts_parse(qemu_find_opts("mempath"), optarg, 1);
> +                if (!opts) {
> +                    exit(1);
> +                }
> +
> +                mem_path = qemu_opt_get(opts, "path");
> +                mem_hugetlbfs = !qemu_opt_get_bool(opts, "anyfs", 0);
> +                mem_fallback = qemu_opt_get_bool(opts, "fallback", 1);
> +
> +                value = qemu_opt_get(opts, "mode");
> +                if (value == NULL || !strcmp(value, "temp")) {
> +                    mem_temp = 1;
> +                } else {
> +                    mem_temp = 0;
> +                    if (!strcmp(value, "create")) {
> +                        mem_create = 1;
> +                    } else if (!strcmp(value, "open")) {
> +                        mem_create = 0;
> +                    } else {
> +                        fprintf(stderr, "Invalid -mem-path mode value.\n");
> +                        exit(1);
> +                    }
> +                }
> +
> +                value = qemu_opt_get(opts, "mmap");
> +                if (value == NULL || !strcmp(value, "private")) {
> +                    mem_shared = 0;
> +                } else if (!strcmp(value, "shared")) {
> +                    mem_shared = 1;
> +                } else {
> +                    fprintf(stderr, "Invalid -mem-path mmap value.\n");
> +                    exit(1);
> +                }
>                  break;
> +            }
>  #ifdef MAP_POPULATE
>              case QEMU_OPTION_mem_prealloc:
>                  mem_prealloc = 1;
> -- 
> 1.7.5.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]