qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PULL 23/25] mmap-alloc: fix hugetlbfs misaligned lengt


From: Greg Kurz
Subject: Re: [Qemu-devel] [PULL 23/25] mmap-alloc: fix hugetlbfs misaligned length in ppc64
Date: Mon, 4 Feb 2019 16:15:54 +0100

Hi Michael,

These two patches (22 and 23) from Murilo already got merged with a pull request
from David earlier today.

Cheers,

--
Greg

On Mon, 4 Feb 2019 09:44:04 -0500
"Michael S. Tsirkin" <address@hidden> wrote:

> From: Murilo Opsfelder Araujo <address@hidden>
> 
> The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc:
> fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64.
> 
> However, we still need to consider the underlying huge page size
> during munmap() because it requires that both address and length be a
> multiple of the underlying huge page size for Huge TLB mappings.
> Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES
> section of the munmap(2) manual:
> 
>   "For munmap(), addr and length must both be a multiple of the
>   underlying huge page size."
> 
> On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB
> mappings because the mapped segment can be aligned with the underlying
> huge page size, not aligned with the native system page size, as
> returned by getpagesize().
> 
> This has the side effect of not releasing huge pages back to the pool
> after a hugetlbfs file-backed memory device is hot-unplugged.
> 
> This patch fixes the situation in qemu_ram_mmap() and
> qemu_ram_munmap() by considering the underlying page size on ppc64.
> 
> After this patch, memory hot-unplug releases huge pages back to the
> pool.
> 
> Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218
> Signed-off-by: Murilo Opsfelder Araujo <address@hidden>
> Reviewed-by: Michael S. Tsirkin <address@hidden>
> Signed-off-by: Michael S. Tsirkin <address@hidden>
> Reviewed-by: Greg Kurz <address@hidden>
> ---
>  include/qemu/mmap-alloc.h |  2 +-
>  exec.c                    |  4 ++--
>  util/mmap-alloc.c         | 22 ++++++++++++++++------
>  util/oslib-posix.c        |  2 +-
>  4 files changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> index 50385e3f81..ef04f0ed5b 100644
> --- a/include/qemu/mmap-alloc.h
> +++ b/include/qemu/mmap-alloc.h
> @@ -9,6 +9,6 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
>  
>  void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
>  
> -void qemu_ram_munmap(void *ptr, size_t size);
> +void qemu_ram_munmap(int fd, void *ptr, size_t size);
>  
>  #endif
> diff --git a/exec.c b/exec.c
> index 25f3938a27..03dd673d36 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1873,7 +1873,7 @@ static void *file_ram_alloc(RAMBlock *block,
>      if (mem_prealloc) {
>          os_mem_prealloc(fd, area, memory, smp_cpus, errp);
>          if (errp && *errp) {
> -            qemu_ram_munmap(area, memory);
> +            qemu_ram_munmap(fd, area, memory);
>              return NULL;
>          }
>      }
> @@ -2394,7 +2394,7 @@ static void reclaim_ramblock(RAMBlock *block)
>          xen_invalidate_map_cache_entry(block->host);
>  #ifndef _WIN32
>      } else if (block->fd >= 0) {
> -        qemu_ram_munmap(block->host, block->max_length);
> +        qemu_ram_munmap(block->fd, block->host, block->max_length);
>          close(block->fd);
>  #endif
>      } else {
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> index f71ea038c8..8565885420 100644
> --- a/util/mmap-alloc.c
> +++ b/util/mmap-alloc.c
> @@ -80,6 +80,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool 
> shared)
>      int flags;
>      int guardfd;
>      size_t offset;
> +    size_t pagesize;
>      size_t total;
>      void *guardptr;
>      void *ptr;
> @@ -100,7 +101,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, 
> bool shared)
>       * anonymous memory is OK.
>       */
>      flags = MAP_PRIVATE;
> -    if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) {
> +    pagesize = qemu_fd_getpagesize(fd);
> +    if (fd == -1 || pagesize == getpagesize()) {
>          guardfd = -1;
>          flags |= MAP_ANONYMOUS;
>      } else {
> @@ -109,6 +111,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, 
> bool shared)
>      }
>  #else
>      guardfd = -1;
> +    pagesize = getpagesize();
>      flags = MAP_PRIVATE | MAP_ANONYMOUS;
>  #endif
>  
> @@ -120,7 +123,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, 
> bool shared)
>  
>      assert(is_power_of_2(align));
>      /* Always align to host page size */
> -    assert(align >= getpagesize());
> +    assert(align >= pagesize);
>  
>      flags = MAP_FIXED;
>      flags |= fd == -1 ? MAP_ANONYMOUS : 0;
> @@ -143,17 +146,24 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, 
> bool shared)
>       * a guard page guarding against potential buffer overflows.
>       */
>      total -= offset;
> -    if (total > size + getpagesize()) {
> -        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> +    if (total > size + pagesize) {
> +        munmap(ptr + size + pagesize, total - size - pagesize);
>      }
>  
>      return ptr;
>  }
>  
> -void qemu_ram_munmap(void *ptr, size_t size)
> +void qemu_ram_munmap(int fd, void *ptr, size_t size)
>  {
> +    size_t pagesize;
> +
>      if (ptr) {
>          /* Unmap both the RAM block and the guard page */
> -        munmap(ptr, size + getpagesize());
> +#if defined(__powerpc64__) && defined(__linux__)
> +        pagesize = qemu_fd_getpagesize(fd);
> +#else
> +        pagesize = getpagesize();
> +#endif
> +        munmap(ptr, size + pagesize);
>      }
>  }
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 4ce1ba9ca4..37c5854b9c 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -226,7 +226,7 @@ void qemu_vfree(void *ptr)
>  void qemu_anon_ram_free(void *ptr, size_t size)
>  {
>      trace_qemu_anon_ram_free(ptr, size);
> -    qemu_ram_munmap(ptr, size);
> +    qemu_ram_munmap(-1, ptr, size);
>  }
>  
>  void qemu_set_block(int fd)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]