[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#14404: regexp_exec thread-unsafe
From: |
Ludovic Courtès |
Subject: |
bug#14404: regexp_exec thread-unsafe |
Date: |
Tue, 14 May 2013 23:21:43 +0200 |
User-agent: |
Gnus/5.130005 (Ma Gnus v0.5) Emacs/24.3 (gnu/linux) |
Hello,
Guile imports Gnulib ‘regexp’ module, and for some reason it gets
compiled even on glibc systems.
The problem is that Gnulib’s ‘regex_internal.h’ reads this:
#if defined _LIBC
# include <bits/libc-lock.h>
#else
# define __libc_lock_init(NAME) do { } while (0)
# define __libc_lock_lock(NAME) do { } while (0)
# define __libc_lock_unlock(NAME) do { } while (0)
#endif
and ‘regexp_exec’ happily does:
__libc_lock_lock (dfa->lock);
if (preg->no_sub)
err = re_search_internal (preg, string, length, start, length,
length, 0, NULL, eflags);
else
err = re_search_internal (preg, string, length, start, length,
length, nmatch, pmatch, eflags);
__libc_lock_unlock (dfa->lock);
Obviously that doesn’t work well, and it caused a regression in Guile.
With MALLOC_CHECK_=2 and a Guile program, it aborts like this (sorry, I
failed to come up with a reduced test case):
--8<---------------cut here---------------start------------->8---
(gdb) bt full
#0 0x00007f793608d2b5 in raise () from /home/ludo/.guix-profile/lib/libc.so.6
No symbol table info available.
#1 0x00007f793608e738 in abort () from /home/ludo/.guix-profile/lib/libc.so.6
No symbol table info available.
#2 0x00007f79360d2afe in malloc_printerr () from
/home/ludo/.guix-profile/lib/libc.so.6
No symbol table info available.
#3 0x00007f79360d5bf5 in realloc_check () from
/home/ludo/.guix-profile/lib/libc.so.6
No symbol table info available.
#4 0x00007f7937a6efed in register_state (dfa=0x10872c0, address@hidden,
address@hidden)
at regex_internal.c:1590
new_alloc = 8
new_array = 0x0
spot = 0x11981c0
i = <optimized out>
#5 0x00007f7937a72373 in create_cd_newstate (hash=<optimized out>, context=1,
nodes=0x12, dfa=0x1c6dce0) at regex_internal.c:1734
i = <optimized out>
nctx_nodes = <optimized out>
err = <optimized out>
newstate = 0x1c6dce0
#6 re_acquire_state_context (address@hidden, address@hidden, address@hidden,
address@hidden) at regex_internal.c:1554
hash = 6
spot = <optimized out>
i = <optimized out>
#7 0x00007f79379bb54b in build_trtable (dfa=<optimized out>, address@hidden)
at regexec.c:3474
next_node = <optimized out>
err = _REG_NOERROR
i = 1
j = 2
ch = <optimized out>
need_word_trtable = true
elem = <optimized out>
mask = <optimized out>
dests_node_malloced = true
dest_states_malloced = true
trtable = <optimized out>
dest_states = 0x1c688f0
dest_states_word = 0x1c68918
dest_states_nl = 0x1c68940
follows = {alloc = 74, nelem = 18, elems = 0x1c5d4e0}
dests_node = 0x1c642b0
dests_ch = 0x1c65ab0
acceptable = {288063250384289792, 576460743847706622, 0, 0}
dests_alloc = 0x1c642b0
#8 0x00007f7937a77714 in transit_state (state=0x1c44820, mctx=0x7fff67bda000,
err=0x7fff67bd9fb0) at regexec.c:2339
trtable = <optimized out>
ch = 116 't'
#9 check_matching (p_match_first=0x7fff67bd9fb8, fl_longest_match=true,
mctx=0x7fff67bda000) at regexec.c:1187
old_state = 0x1c44820
next_char_idx = <optimized out>
dfa = <optimized out>
err = _REG_NOERROR
match = 0
match_last = <optimized out>
next_start_idx = 0
cur_str_idx = <optimized out>
cur_state = 0x1c44820
at_init_state = <optimized out>
#10 re_search_internal (preg=0x1256700, string=0x1c612d0
"http://hydra.gnu.org/nar/0did57blsaaspj49cl3q8nnydm5pr1qg-cflow-1.4",
length=67, start=0, last_start=0, stop=<optimized out>, nmatch=9,
pmatch=0x1c63d50, eflags=0) at regexec.c:856
err = _REG_NOERROR
dfa = 0x10872c0
left_lim = 0
right_lim = 0
incr = 1
fl_longest_match = <optimized out>
match_kind = 8
match_first = 0
match_last = <optimized out>
extra_nmatch = 0
sb = false
mctx = {input = {raw_mbs = 0x1c612d0
"http://hydra.gnu.org/nar/0did57blsaaspj49cl3q8nnydm5pr1qg-cflow-1.4",
mbs = 0x1c612d0
"http://hydra.gnu.org/nar/0did57blsaaspj49cl3q8nnydm5pr1qg-cflow-1.4", wcs =
0x1c63f50,
offsets = 0x0, cur_state = {__count = 0, __value = {__wch = 0,
__wchb = "\000\000\000"}}, raw_mbs_idx = 0,
valid_len = 67, valid_raw_len = 67, bufs_len = 68, cur_idx = 2,
raw_len = 67, len = 67, raw_stop = 67, stop = 67,
tip_context = 6, trans = 0x0, word_char = 0x1087378, icase = 0
'\000', is_utf8 = 1 '\001', map_notascii = 0 '\000',
mbs_allocated = 0 '\000', offsets_needed = 0 '\000', newline_anchor
= 0 '\000', word_ops_used = 0 '\000',
mb_cur_max = 6}, dfa = 0x10872c0, eflags = 0, match_last =
18446744073709551615, last_node = 0,
state_log = 0x1c64070, state_log_top = 1, nbkref_ents = 0,
abkref_ents = 0, bkref_ents = 0x0, max_mb_elem_len = 0,
nsub_tops = 0, asub_tops = 0, sub_tops = 0x0}
fastmap = 0x0
t = 0x0
#11 0x00007f7937a7b5c0 in rpl_regexec (preg=0x1256700, string=0x3399 <Address
0x3399 out of bounds>,
address@hidden
"http://hydra.gnu.org/nar/0did57blsaaspj49cl3q8nnydm5pr1qg-cflow-1.4",
nmatch=6, address@hidden,
pmatch=0xffffffffffffffff, address@hidden, eflags=0) at regexec.c:254
err = _REG_NOERROR
start = 13209
length = 0
#12 0x00007f7937a68873 in scm_regexp_exec (rx=0x1836090, str=0x1806b00,
start=<optimized out>, flags=0x2) at regex-posix.c:288
status = <optimized out>
nmatches = 9
offset = 0
matches = 0x1c63d50
c_str = 0x1c612d0
"http://hydra.gnu.org/nar/0did57blsaaspj49cl3q8nnydm5pr1qg-cflow-1.4"
mvec = 0x4
substr = <optimized out>
--8<---------------cut here---------------end--------------->8---
How should that be fixed? Shouldn’t __libc_lock_unlock & co. be rebased
on top of pthread_mutex_t?
Thanks,
Ludo’.
- bug#14404: regexp_exec thread-unsafe,
Ludovic Courtès <=