qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PULL 15/36] memory: fix race between TCG and accesses


From: Pavel Dovgalyuk
Subject: Re: [Qemu-devel] [PULL 15/36] memory: fix race between TCG and accesses to dirty bitmap
Date: Thu, 12 Sep 2019 09:54:09 +0300

Ping.


Pavel Dovgalyuk

> -----Original Message-----
> From: dovgaluk [mailto:address@hidden]
> Sent: Monday, August 26, 2019 3:19 PM
> To: Paolo Bonzini; address@hidden
> Cc: address@hidden; Qemu-devel
> Subject: Re: [Qemu-devel] [PULL 15/36] memory: fix race between TCG and 
> accesses to dirty
> bitmap
> 
> This patch breaks the execution recording.
> While vCPU tries to lock replay mutex in main while loop,
> vga causes dirty memory sync and do_run_on_cpu call.
> This call waits for vCPU to process the work queue.
> 
> Pavel Dovgalyuk
> 
> Paolo Bonzini писал 2019-08-20 09:59:
> > There is a race between TCG and accesses to the dirty log:
> >
> >       vCPU thread                  reader thread
> >       -----------------------      -----------------------
> >       TLB check -> slow path
> >         notdirty_mem_write
> >           write to RAM
> >           set dirty flag
> >                                    clear dirty flag
> >       TLB check -> fast path
> >                                    read memory
> >         write to RAM
> >
> > Fortunately, in order to fix it, no change is required to the
> > vCPU thread.  However, the reader thread must delay the read after
> > the vCPU thread has finished the write.  This can be approximated
> > conservatively by run_on_cpu, which waits for the end of the current
> > translation block.
> >
> > A similar technique is used by KVM, which has to do a synchronous TLB
> > flush after doing a test-and-clear of the dirty-page flags.
> >
> > Reported-by: Dr. David Alan Gilbert <address@hidden>
> > Signed-off-by: Paolo Bonzini <address@hidden>
> > ---
> >  exec.c                | 31 +++++++++++++++++++++++++++++++
> >  include/exec/memory.h | 12 ++++++++++++
> >  memory.c              | 10 +++++++++-
> >  migration/ram.c       |  1 +
> >  4 files changed, 53 insertions(+), 1 deletion(-)
> >
> > diff --git a/exec.c b/exec.c
> > index 3e78de3..ae68f72 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -198,6 +198,7 @@ typedef struct subpage_t {
> >
> >  static void io_mem_init(void);
> >  static void memory_map_init(void);
> > +static void tcg_log_global_after_sync(MemoryListener *listener);
> >  static void tcg_commit(MemoryListener *listener);
> >
> >  static MemoryRegion io_mem_watch;
> > @@ -906,6 +907,7 @@ void cpu_address_space_init(CPUState *cpu, int
> > asidx,
> >      newas->cpu = cpu;
> >      newas->as = as;
> >      if (tcg_enabled()) {
> > +        newas->tcg_as_listener.log_global_after_sync =
> > tcg_log_global_after_sync;
> >          newas->tcg_as_listener.commit = tcg_commit;
> >          memory_listener_register(&newas->tcg_as_listener, as);
> >      }
> > @@ -3143,6 +3145,35 @@ void
> > address_space_dispatch_free(AddressSpaceDispatch *d)
> >      g_free(d);
> >  }
> >
> > +static void do_nothing(CPUState *cpu, run_on_cpu_data d)
> > +{
> > +}
> > +
> > +static void tcg_log_global_after_sync(MemoryListener *listener)
> > +{while (1) {
>          qemu_mutex_unlock_iothread();
>          replay_mutex_lock();
>          qemu_mutex_lock_i
> > +    CPUAddressSpace *cpuas;
> > +
> > +    /* Wait for the CPU to end the current TB.  This avoids the
> > following
> > +     * incorrect race:
> > +     *
> > +     *      vCPU                         migration
> > +     *      ----------------------       -------------------------
> > +     *      TLB check -> slow path
> > +     *        notdirty_mem_write
> > +     *          write to RAM
> > +     *          mark dirty
> > +     *                                   clear dirty flag
> > +     *      TLB check -> fast path
> > +     *                                   read memory
> > +     *        write to RAM
> > +     *
> > +     * by pushing the migration thread's memory read after the vCPU
> > thread has
> > +     * written the memory.
> > +     */
> > +    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
> > +    run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
> > +}
> > +
> >  static void tcg_commit(MemoryListener *listener)
> >  {
> >      CPUAddressSpace *cpuas;
> > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > index bb0961d..b6bcf31 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -419,6 +419,7 @@ struct MemoryListener {
> >      void (*log_clear)(MemoryListener *listener, MemoryRegionSection
> > *section);
> >      void (*log_global_start)(MemoryListener *listener);
> >      void (*log_global_stop)(MemoryListener *listener);
> > +    void (*log_global_after_sync)(MemoryListener *listener);
> >      void (*eventfd_add)(MemoryListener *listener, MemoryRegionSection
> > *section,
> >                          bool match_data, uint64_t data, EventNotifier
> > *e);
> >      void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection
> > *section,
> > @@ -1682,6 +1683,17 @@ MemoryRegionSection
> > memory_region_find(MemoryRegion *mr,
> >  void memory_global_dirty_log_sync(void);
> >
> >  /**
> > + * memory_global_dirty_log_sync: synchronize the dirty log for all
> > memory
> > + *
> > + * Synchronizes the vCPUs with a thread that is reading the dirty
> > bitmap.
> > + * This function must be called after the dirty log bitmap is cleared,
> > and
> > + * before dirty guest memory pages are read.  If you are using
> > + * #DirtyBitmapSnapshot, memory_region_snapshot_and_clear_dirty()
> > takes
> > + * care of doing this.
> > + */
> > +void memory_global_after_dirty_log_sync(void);
> > +
> > +/**
> >   * memory_region_transaction_begin: Start a transaction.
> >   *
> >   * During a transaction, changes will be accumulated and made visible
> > diff --git a/memory.c b/memory.c
> > index e42d63a..edd0c13 100644
> > --- a/memory.c
> > +++ b/memory.c
> > @@ -2127,9 +2127,12 @@ DirtyBitmapSnapshot
> > *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr,
> >                                                              hwaddr
> > size,
> >                                                              unsigned
> > client)
> >  {
> > +    DirtyBitmapSnapshot *snapshot;
> >      assert(mr->ram_block);
> >      memory_region_sync_dirty_bitmap(mr);
> > -    return cpu_physical_memory_snapshot_and_clear_dirty(mr, addr,
> > size, client);
> > +    snapshot = cpu_physical_memory_snapshot_and_clear_dirty(mr, addr,
> > size, client);
> > +    memory_global_after_dirty_log_sync();
> > +    return snapshot;
> >  }
> >
> >  bool memory_region_snapshot_get_dirty(MemoryRegion *mr,
> > DirtyBitmapSnapshot *snap,
> > @@ -2620,6 +2623,11 @@ void memory_global_dirty_log_sync(void)
> >      memory_region_sync_dirty_bitmap(NULL);
> >  }
> >
> > +void memory_global_after_dirty_log_sync(void)
> > +{
> > +    MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward);
> > +}
> > +
> >  static VMChangeStateEntry *vmstate_change;
> >
> >  void memory_global_dirty_log_start(void)
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 889148d..4e3a6ae 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -1847,6 +1847,7 @@ static void migration_bitmap_sync(RAMState *rs)
> >      rcu_read_unlock();
> >      qemu_mutex_unlock(&rs->bitmap_mutex);
> >
> > +    memory_global_after_dirty_log_sync();
> >      trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
> >
> >      end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]