[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
make strstr multibyte-safe
From: |
Bruno Haible |
Subject: |
make strstr multibyte-safe |
Date: |
Wed, 17 Aug 2005 17:35:22 +0200 |
User-agent: |
KMail/1.5 |
Hi,
After strcasecmp(), also strstr() can be made to work in multibyte locales.
I committed the appended patch. (Another possible implementation would
have been to call locale_charset() and compare its result to "BIG5", "GBK"
and a few others, to exploit the fact that an UTF-8 string can use the
bytewise search function.)
2005-08-17 Bruno Haible <address@hidden>
* modules/strstr (Files): Add m4/mbrtowc.m4.
(Depends-on): Add mbuiter.
* lib/strstr.h: Ignore HAVE_STRSTR, always declare the gnulib function.
* lib/strstr.c: Completely rewritten, with multibyte locale support.
* m4/strstr.m4 (gl_FUNC_STRSTR): Use the replacement function always.
(gl_PREREQ_STRSTR): Use gl_FUNC_MBRTOWC.
Index: modules/strstr
===================================================================
RCS file: /cvsroot/gnulib/gnulib/modules/strstr,v
retrieving revision 1.5
diff -c -3 -r1.5 strstr
*** modules/strstr 22 Sep 2004 15:11:04 -0000 1.5
--- modules/strstr 17 Aug 2005 14:01:49 -0000
***************
*** 5,12 ****
--- 5,14 ----
lib/strstr.h
lib/strstr.c
m4/strstr.m4
+ m4/mbrtowc.m4
Depends-on:
+ mbuiter
configure.ac:
gl_FUNC_STRSTR
Index: lib/strstr.h
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/strstr.h,v
retrieving revision 1.4
diff -c -3 -r1.4 strstr.h
*** lib/strstr.h 14 May 2005 06:03:58 -0000 1.4
--- lib/strstr.h 17 Aug 2005 14:01:49 -0000
***************
*** 1,5 ****
/* Searching in a string.
! Copyright (C) 2001-2003 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
--- 1,5 ----
/* Searching in a string.
! Copyright (C) 2001-2003, 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
***************
*** 15,27 ****
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
- #if HAVE_STRSTR
-
- /* Get strstr() declaration. */
- #include <string.h>
-
- #else
-
#ifdef __cplusplus
extern "C" {
#endif
--- 15,20 ----
***************
*** 31,36 ****
#ifdef __cplusplus
}
- #endif
-
#endif
--- 24,27 ----
Index: lib/strstr.c
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/strstr.c,v
retrieving revision 1.11
diff -c -3 -r1.11 strstr.c
*** lib/strstr.c 14 May 2005 06:03:58 -0000 1.11
--- lib/strstr.c 17 Aug 2005 14:01:49 -0000
***************
*** 1,119 ****
! /* Copyright (C) 1994, 1999, 2002-2003 Free Software Foundation, Inc.
! This file is part of the GNU C Library.
!
! This program is free software; you can redistribute it and/or modify
! it under the terms of the GNU General Public License as published by
! the Free Software Foundation; either version 2, or (at your option)
! any later version.
!
! This program is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU General Public License for more details.
!
! You should have received a copy of the GNU General Public License
! along with this program; if not, write to the Free Software
! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. */
!
! /*
! * My personal strstr() implementation that beats most other algorithms.
! * Until someone tells me otherwise, I assume that this is the
! * fastest implementation of strstr() in C.
! * I deliberately chose not to comment it. You should have at least
! * as much fun trying to understand it, as I had to write it :-).
! *
! * Stephen R. van den Berg, address@hidden */
#if HAVE_CONFIG_H
# include <config.h>
#endif
! #include <string.h>
!
! typedef unsigned chartype;
! #undef strstr
char *
! strstr (const char *phaystack, const char *pneedle)
{
! register const unsigned char *haystack, *needle;
! register chartype b, c;
! haystack = (const unsigned char *) phaystack;
! needle = (const unsigned char *) pneedle;
! b = *needle;
! if (b != '\0')
{
! haystack--; /* possible ANSI violation */
! do
{
! c = *++haystack;
! if (c == '\0')
! goto ret0;
! }
! while (c != b);
!
! c = *++needle;
! if (c == '\0')
! goto foundneedle;
! ++needle;
! goto jin;
!
! for (;;)
! {
! register chartype a;
! register const unsigned char *rhaystack, *rneedle;
! do
{
! a = *++haystack;
! if (a == '\0')
! goto ret0;
! if (a == b)
! break;
! a = *++haystack;
! if (a == '\0')
! goto ret0;
! shloop:; }
! while (a != b);
!
! jin: a = *++haystack;
! if (a == '\0')
! goto ret0;
!
! if (a != c)
! goto shloop;
!
! rhaystack = haystack-- + 1;
! rneedle = needle;
! a = *rneedle;
!
! if (*rhaystack == a)
! do
! {
! if (a == '\0')
! goto foundneedle;
! ++rhaystack;
! a = *++needle;
! if (*rhaystack != a)
! break;
! if (a == '\0')
! goto foundneedle;
! ++rhaystack;
! a = *++needle;
! }
! while (*rhaystack == a);
!
! needle = rneedle; /* took the register-poor approach */
!
! if (a == '\0')
! break;
! }
}
- foundneedle:
- return (char*) haystack;
- ret0:
- return 0;
}
--- 1,126 ----
! /* Searching in a string.
! Copyright (C) 2005 Free Software Foundation, Inc.
! Written by Bruno Haible <address@hidden>, 2005.
!
! This program is free software; you can redistribute it and/or modify
! it under the terms of the GNU General Public License as published by
! the Free Software Foundation; either version 2, or (at your option)
! any later version.
!
! This program is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU General Public License for more details.
!
! You should have received a copy of the GNU General Public License
! along with this program; if not, write to the Free Software Foundation,
! Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#if HAVE_CONFIG_H
# include <config.h>
#endif
! /* Specification. */
! #include "strstr.h"
! #if HAVE_MBRTOWC
! # include "mbuiter.h"
! #endif
+ /* Find the first occurrence of NEEDLE in HAYSTACK. */
char *
! strstr (const char *haystack, const char *needle)
{
! /* Be careful not to look at the entire extent of haystack or needle
! until needed. This is useful because of these two cases:
! - haystack may be very long, and a match of needle found early,
! - needle may be very long, and not even a short initial segment of
! needle may be found in haystack. */
! #if HAVE_MBRTOWC
! if (MB_CUR_MAX > 1)
! {
! mbui_iterator_t iter_needle;
! mbui_init (iter_needle, needle);
! if (mbui_avail (iter_needle))
! {
! mbui_iterator_t iter_haystack;
! mbui_init (iter_haystack, haystack);
! for (;; mbui_advance (iter_haystack))
! {
! if (!mbui_avail (iter_haystack))
! /* No match. */
! return NULL;
!
! if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle)))
! /* The first character matches. */
! {
! mbui_iterator_t rhaystack;
! mbui_iterator_t rneedle;
!
! memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
! mbui_advance (rhaystack);
!
! mbui_init (rneedle, needle);
! if (!mbui_avail (rneedle))
! abort ();
! mbui_advance (rneedle);
!
! for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
! {
! if (!mbui_avail (rneedle))
! /* Found a match. */
! return (char *) haystack;
! if (!mbui_avail (rhaystack))
! /* No match. */
! return NULL;
! if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle)))
! /* Nothing in this round. */
! break;
! }
! }
! }
! }
! else
! return (char *) haystack;
! }
! else
! #endif
{
! if (*needle != '\0')
{
! /* Speed up the following searches of needle by caching its first
! character. */
! char b = *needle++;
! for (;; haystack++)
{
! if (*haystack == '\0')
! /* No match. */
! return NULL;
! if (*haystack == b)
! /* The first character matches. */
! {
! const char *rhaystack = haystack + 1;
! const char *rneedle = needle;
!
! for (;; rhaystack++, rneedle++)
! {
! if (*rneedle == '\0')
! /* Found a match. */
! return (char *) haystack;
! if (*rhaystack == '\0')
! /* No match. */
! return NULL;
! if (*rhaystack != *rneedle)
! /* Nothing in this round. */
! break;
! }
! }
! }
! }
! else
! return (char *) haystack;
}
}
Index: m4/strstr.m4
===================================================================
RCS file: /cvsroot/gnulib/gnulib/m4/strstr.m4,v
retrieving revision 1.3
diff -c -3 -r1.3 strstr.m4
*** m4/strstr.m4 18 Jan 2005 13:07:56 -0000 1.3
--- m4/strstr.m4 17 Aug 2005 14:01:49 -0000
***************
*** 1,16 ****
! # strstr.m4 serial 2
! dnl Copyright (C) 2002-2003 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_STRSTR],
[
! AC_REPLACE_FUNCS(strstr)
! if test $ac_cv_func_strstr = no; then
! gl_PREREQ_STRSTR
! fi
])
# Prerequisites of lib/strstr.c.
! AC_DEFUN([gl_PREREQ_STRSTR], [:])
--- 1,19 ----
! # strstr.m4 serial 3
! dnl Copyright (C) 2002-2003, 2005 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_STRSTR],
[
! dnl No known system has a strstr() function that works correctly in
! dnl multibyte locales. Therefore we use our version always.
! AC_LIBOBJ(strstr)
! AC_DEFINE(strstr, rpl_strstr, [Define to rpl_strstr always.])
! gl_PREREQ_STRSTR
])
# Prerequisites of lib/strstr.c.
! AC_DEFUN([gl_PREREQ_STRSTR], [
! gl_FUNC_MBRTOWC
! ])
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- make strstr multibyte-safe,
Bruno Haible <=