Hi!
Could someone help me with the following please?
We have a crash in v5.2.0-based downstream branch. I don't have a realiable
reproduce, neither can find significant differencies with upstream code in
touched functions.
#0 0x00007fc2a8dbfe7f in raise () from /lib64/libc.so.6
#1 0x00007fc2a8daa8b5 in abort () from /lib64/libc.so.6
#2 0x00007fc2a8daa789 in __assert_fail_base.cold.0 () from /lib64/libc.so.6
#3 0x00007fc2a8db8576 in __assert_fail () from /lib64/libc.so.6
#4 0x000055fc1d76e195 in aio_ctx_finalize (source=<optimized out>) at
../util/async.c:343
#5 aio_ctx_finalize (source=0x55fc1f9cebf0) at ../util/async.c:311
#6 0x00007fc2aa335789 in g_source_unref_internal () from
/lib64/libglib-2.0.so.0
#7 0x00007fc2aa335a0e in g_source_iter_next () from /lib64/libglib-2.0.so.0
#8 0x00007fc2aa336b35 in g_main_context_unref () from /lib64/libglib-2.0.so.0
#9 0x00007fc2aa338d9c in g_main_loop_unref () from /lib64/libglib-2.0.so.0
#10 0x000055fc1d666094 in iothread_instance_finalize (obj=<optimized out>) at
../iothread.c:145
#11 0x000055fc1d644e19 in object_deinit (type=0x55fc1f7db490, obj=<optimized
out>) at ../qom/object.c:671
#12 object_finalize (data=0x55fc1f88da00) at ../qom/object.c:685
#13 object_unref (objptr=0x55fc1f88da00) at ../qom/object.c:1183
#14 0x000055fc1d643365 in object_property_del_child (obj=0x55fc1f9a80e0,
child=0x55fc1f88da00) at ../qom/object.c:645
#15 0x000055fc1d644618 in object_unparent (obj=<optimized out>) at
../qom/object.c:664
#16 0x000055fc1d6661d9 in iothread_destroy (iothread=<optimized out>) at
../iothread.c:369
#17 0x000055fc1d6ec5d9 in monitor_cleanup () at ../monitor/monitor.c:670
#18 0x000055fc1d63584b in qemu_cleanup () at ../softmmu/vl.c:4562
#19 0x000055fc1d374307 in main (argc=<optimized out>, argv=<optimized out>,
envp=<optimized out>) at ../softmmu/main.c:51
(gdb) fr 10
#10 0x000055fc1d666094 in iothread_instance_finalize (obj=<optimized out>) at
../iothread.c:145
145 g_main_loop_unref(iothread->main_loop);
(gdb) list
140 iothread->ctx = NULL;
141 }
142 if (iothread->worker_context) {
143 g_main_context_unref(iothread->worker_context);
144 iothread->worker_context = NULL;
145 g_main_loop_unref(iothread->main_loop);
146 iothread->main_loop = NULL;
147 }
148 qemu_sem_destroy(&iothread->init_done_sem);
149 }
(gdb) p iothread
$24 = (IOThread *) 0x55fc1f88da00
(gdb) p mon_iothread
$25 = (IOThread *) 0x55fc1f88da00
(gdb) p *mon_iothread
$26 = {parent_obj = {class = 0x55fc1f92b4e0, free = 0x7fc2aa33e3a0 <g_free>,
properties = 0x55fc1f9ad980, ref = 0, parent = 0x0}, thread = {thread =
140473870030592}, ctx = 0x0, run_gcontext = true,
worker_context = 0x0, main_loop = 0x55fc1f9a8300, init_done_sem = {lock =
{__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0,
__spins = 0, __elision = 0, __list = {__prev = 0x0,
__next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0},
cond = {__data = {{__wseq = 3, __wseq32 = {__low = 3, __high = 0}}, {__g1_start = 1,
__g1_start32 = {__low = 1, __high = 0}},
__g_refs = {0, 0}, __g_size = {0, 0}, __g1_orig_size = 4, __wrefs = 0,
__g_signals = {0, 0}},
__size = "\003\000\000\000\000\000\000\000\001", '\000' <repeats 23 times>,
"\004", '\000' <repeats 14 times>, __align = 3}, count = 0, initialized = true}, stopping =
true, running = false,
thread_id = 10141, poll_max_ns = 32768, poll_grow = 0, poll_shrink = 0}
(gdb) info thr
Id Target Id Frame
* 1 Thread 0x7fc2a4a19f00 (LWP 10134) 0x000055fc1d666094 in
iothread_instance_finalize (obj=<optimized out>) at ../iothread.c:145
2 Thread 0x7fc2a4a16700 (LWP 10136) 0x00007fc2a8e8002d in syscall () from
/lib64/libc.so.6
3 Thread 0x7fc29e9f8700 (LWP 10143) 0x00007fc2a8f5e65c in
pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
4 Thread 0x7fc29e1f7700 (LWP 10144) 0x00007fc2a8f5e65c in
pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
5 Thread 0x7fc2027ff700 (LWP 10147) 0x00007fc2a8f5e65c in
pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
6 Thread 0x7fc29ffff700 (LWP 10137) 0x00007fc2a8f5e9aa in
pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
7 Thread 0x7fc20301a700 (LWP 10146) 0x00007fc2a8e7a541 in poll () from
/lib64/libc.so.6
So, it's about mon_iothread, and it's already finished to the moment of the
crash (this seems correct as earlier in iothread_instance_finalize() there is
iothread_stop() which does qemu_thread_join())
Now, what crashed:
(gdb) fr 4
#4 0x000055fc1d76e195 in aio_ctx_finalize (source=<optimized out>) at
../util/async.c:343
343 assert(flags & BH_DELETED);
(gdb) list
338 /* There must be no aio_bh_poll() calls going on */
339 assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
340
341 while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
342 /* qemu_bh_delete() must have been called on BHs in this
AioContext */
343 assert(flags & BH_DELETED);
344
345 g_free(bh);
346 }
347
(gdb) p flags
$1 = 11
(gdb) # BH_ONESHOT | BH_SCHEDULED | BH_PENDING
So, there is unfinished BH in the context when thread is already finished..
Does iothread has own aio context or it may use qemu_aio_context?
Looking at iothread_run(), I can't understand, what prevent creating bh after
iothread finish?
So, what prevents such situation:
1. iothread->running set to true, so we leave while loop in iothread_run
2. exactly after it (from some other thread) we schedule a bh into aio context
of the finished iothread..